Source code for allensdk.brain_observatory.ecephys.stimulus_table.naming_utilities

import re
import warnings
import functools

import pandas as pd
import numpy as np


GABOR_DIAMETER_RE = re.compile(r"gabor_(\d*\.{0,1}\d*)_{0,1}deg(?:_\d+ms){0,1}")
GENERIC_MOVIE_RE = re.compile(
    r"natural_movie_(?P<number>\d+|one|two|three|four|five|six|seven|eight|nine)(_shuffled){0,1}(_more_repeats){0,1}"
)
DIGIT_NAMES = {
    "1": "one",
    "2": "two",
    "3": "three",
    "4": "four",
    "5": "five",
    "6": "six",
    "7": "seven",
    "8": "eight",
    "9": "nine",
}
SHUFFLED_MOVIE_RE = re.compile(r"natural_movie_shuffled")
NUMERAL_RE = re.compile(r"(?P<number>\d+)")


[docs]def drop_empty_columns(table): """ Remove from the stimulus table columns whose values are all nan """ to_drop = [] for colname in table.columns: if table[colname].isna().all(): to_drop.append(colname) table.drop(columns=to_drop, inplace=True) return table
[docs]def collapse_columns(table): """ merge, where possible, columns that describe the same parameter. This is pretty conservative - it only matches columns by capitalization and it only overrides nans. """ colnames = set(table.columns) matches = [] for col in table.columns: for transformed in (col.upper(), col.capitalize()): if transformed in colnames and col != transformed: col_notna = ~(table[col].isna()) trans_notna = ~(table[transformed].isna()) if (col_notna & trans_notna).sum() != 0: continue mask = ~(col_notna) & (trans_notna) matches.append(transformed) table.loc[mask, col] = table[transformed][mask] break table.drop(columns=matches, inplace=True) return table
[docs]def add_number_to_shuffled_movie( table, natural_movie_re=GENERIC_MOVIE_RE, template_re=SHUFFLED_MOVIE_RE, stim_colname="stimulus_name", template="natural_movie_{}_shuffled", tmp_colname="__movie_number__", ): """ """ if not table[stim_colname].str.contains(SHUFFLED_MOVIE_RE).any(): return table table = table.copy() table[tmp_colname] = table[stim_colname].str.extract(natural_movie_re, expand=True)[ "number" ] unique_numbers = [ item for item in table[tmp_colname].dropna(inplace=False).unique() ] if len(unique_numbers) != 1: raise ValueError( f"unable to uniquely determine a movie number for this session. Candidates: {unique_numbers}" ) movie_number = unique_numbers[0] def renamer(row): if not isinstance(row[stim_colname], str): return row[stim_colname] if not template_re.match(row[stim_colname]): return row[stim_colname] else: return template.format(movie_number) table[stim_colname] = table.apply(renamer, axis=1) table.drop(columns=tmp_colname, inplace=True) return table
[docs]def standardize_movie_numbers( table, movie_re=GENERIC_MOVIE_RE, numeral_re=NUMERAL_RE, digit_names=DIGIT_NAMES, stim_colname="stimulus_name", ): """ Natural movie stimuli in visual coding are numbered using words, like "natural_movie_two" rather than "natural_movie_2". This function ensures that all of the natural movie stimuli in an experiment are named by that convention. Parameters ---------- table : pd.DataFrame the incoming stimulus table movie_re : re.Pattern, optional regex that matches movie stimulus names numeral_re : re.Pattern, optional regex that extracts movie numbers from stimulus names digit_names : dict, optional map from numerals to english words stim_colname : str, optional the name of the dataframe column that contains stimulus names Returns ------- table : pd.DataFrame the stimulus table with movie numerals having been mapped to english words """ replace = lambda match_obj: digit_names[match_obj["number"]] # for some reason pandas really wants us to use the captures warnings.filterwarnings("ignore", "This pattern has match groups") movie_rows = table[stim_colname].str.contains(movie_re, na=False) table.loc[movie_rows, stim_colname] = table.loc[ movie_rows, stim_colname ].str.replace(numeral_re, replace) return table
[docs]def map_stimulus_names(table, name_map=None, stim_colname="stimulus_name"): """ Applies a mappting to the stimulus names in a stimulus table Parameters ---------- table : pd.DataFrame the input stimulus table name_map : dict, optional rename the stimuli according to this mapping stim_colname: str, optional look in this column for stimulus names """ if name_map is None: return table if "" in name_map: name_map[np.nan] = name_map[""] table[stim_colname] = table[stim_colname].replace( to_replace=name_map, inplace=False ) return table
[docs]def map_column_names(table, name_map=None, ignore_case=True): if ignore_case and name_map is not None: name_map = {key.lower(): value for key, value in name_map.items()} mapper = lambda name: name if name.lower() not in name_map else name_map[name.lower()] else: mapper = name_map return table.rename(columns=mapper)