Source code for allensdk.brain_observatory.ecephys.stimulus_table.naming_utilities

import re
import warnings

import numpy as np

GABOR_DIAMETER_RE = re.compile(
    r"gabor_(\d*\.{0,1}\d*)_{0,1}deg(?:_\d+ms){0,1}"
)

GENERIC_MOVIE_RE = re.compile(
    r"natural_movie_"
    + r"(?P<number>\d+|one|two|three|four|five|six|seven|eight|nine)"
    + r"(_shuffled){0,1}(_more_repeats){0,1}"
)
DIGIT_NAMES = {
    "1": "one",
    "2": "two",
    "3": "three",
    "4": "four",
    "5": "five",
    "6": "six",
    "7": "seven",
    "8": "eight",
    "9": "nine",
}
SHUFFLED_MOVIE_RE = re.compile(r"natural_movie_shuffled")
NUMERAL_RE = re.compile(r"(?P<number>\d+)")



[docs]
def drop_empty_columns(table):
    """Remove from the stimulus table columns whose values are all nan"""

    to_drop = []

    for colname in table.columns:
        if table[colname].isna().all():
            to_drop.append(colname)

    table.drop(columns=to_drop, inplace=True)
    return table




[docs]
def collapse_columns(table):
    """merge, where possible, columns that describe the same parameter. This
    is pretty conservative - it only matches columns by capitalization and
    it only overrides nans.
    """

    colnames = set(table.columns)

    matches = []
    for col in table.columns:
        for transformed in (col.upper(), col.capitalize()):
            if transformed in colnames and col != transformed:
                col_notna = ~(table[col].isna())
                trans_notna = ~(table[transformed].isna())
                if (col_notna & trans_notna).sum() != 0:
                    continue

                mask = ~(col_notna) & (trans_notna)

                matches.append(transformed)
                table.loc[mask, col] = table[transformed][mask]
                break

    table.drop(columns=matches, inplace=True)
    return table




[docs]
def add_number_to_shuffled_movie(
    table,
    natural_movie_re=GENERIC_MOVIE_RE,
    template_re=SHUFFLED_MOVIE_RE,
    stim_colname="stimulus_name",
    template="natural_movie_{}_shuffled",
    tmp_colname="__movie_number__",
):
    """ """

    if not table[stim_colname].str.contains(SHUFFLED_MOVIE_RE).any():
        return table
    table = table.copy()

    table[tmp_colname] = table[stim_colname].str.extract(
        natural_movie_re, expand=True
    )["number"]

    unique_numbers = [
        item for item in table[tmp_colname].dropna(inplace=False).unique()
    ]
    if len(unique_numbers) != 1:
        raise ValueError(
            "unable to uniquely determine a movie number for this session. "
            + f"Candidates: {unique_numbers}"
        )
    movie_number = unique_numbers[0]

    def renamer(row):
        if not isinstance(row[stim_colname], str):
            return row[stim_colname]
        if not template_re.match(row[stim_colname]):
            return row[stim_colname]
        else:
            return template.format(movie_number)

    table[stim_colname] = table.apply(renamer, axis=1)
    print(table.keys())
    table.drop(columns=tmp_colname, inplace=True)
    return table




[docs]
def standardize_movie_numbers(
    table,
    movie_re=GENERIC_MOVIE_RE,
    numeral_re=NUMERAL_RE,
    digit_names=DIGIT_NAMES,
    stim_colname="stimulus_name",
):
    """Natural movie stimuli in visual coding are numbered using words, like
    "natural_movie_two" rather than "natural_movie_2". This function ensures
    that all of the natural movie stimuli in an experiment are named by that
    convention.

    Parameters
    ----------
    table : pd.DataFrame
        the incoming stimulus table
    movie_re : re.Pattern, optional
        regex that matches movie stimulus names
    numeral_re : re.Pattern, optional
        regex that extracts movie numbers from stimulus names
    digit_names : dict, optional
        map from numerals to english words
    stim_colname : str, optional
        the name of the dataframe column that contains stimulus names

    Returns
    -------
    table : pd.DataFrame
        the stimulus table with movie numerals having been mapped to english
        words

    """

    def replace(match_obj):
        return digit_names[match_obj["number"]]

    # for some reason pandas really wants us to use the captures
    warnings.filterwarnings("ignore", "This pattern has match groups")
    warnings.filterwarnings("ignore", category=UserWarning)

    movie_rows = table[stim_colname].str.contains(movie_re, na=False)
    table.loc[movie_rows, stim_colname] = table.loc[
        movie_rows, stim_colname
    ].str.replace(numeral_re, replace, regex=True)

    return table




[docs]
def map_stimulus_names(table, name_map=None, stim_colname="stimulus_name"):
    """Applies a mappting to the stimulus names in a stimulus table

    Parameters
    ----------
    table : pd.DataFrame
        the input stimulus table
    name_map : dict, optional
        rename the stimuli according to this mapping
    stim_colname: str, optional
        look in this column for stimulus names

    """

    if name_map is None:
        return table

    name_map[np.nan] = "spontaneous"

    table[stim_colname] = table[stim_colname].replace(
        to_replace=name_map, inplace=False
    )

    name_map.pop(np.nan)

    return table




[docs]
def map_column_names(table, name_map=None, ignore_case=True):
    if ignore_case and name_map is not None:
        name_map = {key.lower(): value for key, value in name_map.items()}

    output = table.rename(columns=name_map)

    return output
Source code for allensdk.brain_observatory.ecephys.stimulus_table.naming_utilities

Contents

Questions