Source code for allensdk.brain_observatory.ecephys.stimulus_table.naming_utilities

import re
import warnings

import numpy as np

GABOR_DIAMETER_RE = re.compile(
    r"gabor_(\d*\.{0,1}\d*)_{0,1}deg(?:_\d+ms){0,1}"
)

GENERIC_MOVIE_RE = re.compile(
    r"natural_movie_"
    + r"(?P<number>\d+|one|two|three|four|five|six|seven|eight|nine)"
    + r"(_shuffled){0,1}(_more_repeats){0,1}"
)
DIGIT_NAMES = {
    "1": "one",
    "2": "two",
    "3": "three",
    "4": "four",
    "5": "five",
    "6": "six",
    "7": "seven",
    "8": "eight",
    "9": "nine",
}
SHUFFLED_MOVIE_RE = re.compile(r"natural_movie_shuffled")
NUMERAL_RE = re.compile(r"(?P<number>\d+)")


[docs]def drop_empty_columns(table): """Remove from the stimulus table columns whose values are all nan""" to_drop = [] for colname in table.columns: if table[colname].isna().all(): to_drop.append(colname) table.drop(columns=to_drop, inplace=True) return table
[docs]def collapse_columns(table): """merge, where possible, columns that describe the same parameter. This is pretty conservative - it only matches columns by capitalization and it only overrides nans. """ colnames = set(table.columns) matches = [] for col in table.columns: for transformed in (col.upper(), col.capitalize()): if transformed in colnames and col != transformed: col_notna = ~(table[col].isna()) trans_notna = ~(table[transformed].isna()) if (col_notna & trans_notna).sum() != 0: continue mask = ~(col_notna) & (trans_notna) matches.append(transformed) table.loc[mask, col] = table[transformed][mask] break table.drop(columns=matches, inplace=True) return table
[docs]def add_number_to_shuffled_movie( table, natural_movie_re=GENERIC_MOVIE_RE, template_re=SHUFFLED_MOVIE_RE, stim_colname="stimulus_name", template="natural_movie_{}_shuffled", tmp_colname="__movie_number__", ): """ """ if not table[stim_colname].str.contains(SHUFFLED_MOVIE_RE).any(): return table table = table.copy() table[tmp_colname] = table[stim_colname].str.extract( natural_movie_re, expand=True )["number"] unique_numbers = [ item for item in table[tmp_colname].dropna(inplace=False).unique() ] if len(unique_numbers) != 1: raise ValueError( "unable to uniquely determine a movie number for this session. " + f"Candidates: {unique_numbers}" ) movie_number = unique_numbers[0] def renamer(row): if not isinstance(row[stim_colname], str): return row[stim_colname] if not template_re.match(row[stim_colname]): return row[stim_colname] else: return template.format(movie_number) table[stim_colname] = table.apply(renamer, axis=1) print(table.keys()) table.drop(columns=tmp_colname, inplace=True) return table
[docs]def standardize_movie_numbers( table, movie_re=GENERIC_MOVIE_RE, numeral_re=NUMERAL_RE, digit_names=DIGIT_NAMES, stim_colname="stimulus_name", ): """Natural movie stimuli in visual coding are numbered using words, like "natural_movie_two" rather than "natural_movie_2". This function ensures that all of the natural movie stimuli in an experiment are named by that convention. Parameters ---------- table : pd.DataFrame the incoming stimulus table movie_re : re.Pattern, optional regex that matches movie stimulus names numeral_re : re.Pattern, optional regex that extracts movie numbers from stimulus names digit_names : dict, optional map from numerals to english words stim_colname : str, optional the name of the dataframe column that contains stimulus names Returns ------- table : pd.DataFrame the stimulus table with movie numerals having been mapped to english words """ def replace(match_obj): return digit_names[match_obj["number"]] # for some reason pandas really wants us to use the captures warnings.filterwarnings("ignore", "This pattern has match groups") warnings.filterwarnings("ignore", category=UserWarning) movie_rows = table[stim_colname].str.contains(movie_re, na=False) table.loc[movie_rows, stim_colname] = table.loc[ movie_rows, stim_colname ].str.replace(numeral_re, replace, regex=True) return table
[docs]def map_stimulus_names(table, name_map=None, stim_colname="stimulus_name"): """Applies a mappting to the stimulus names in a stimulus table Parameters ---------- table : pd.DataFrame the input stimulus table name_map : dict, optional rename the stimuli according to this mapping stim_colname: str, optional look in this column for stimulus names """ if name_map is None: return table name_map[np.nan] = "spontaneous" table[stim_colname] = table[stim_colname].replace( to_replace=name_map, inplace=False ) name_map.pop(np.nan) return table
[docs]def map_column_names(table, name_map=None, ignore_case=True): if ignore_case and name_map is not None: name_map = {key.lower(): value for key, value in name_map.items()} output = table.rename(columns=name_map) return output