import re
import warnings
import functools
import pandas as pd
import numpy as np
GABOR_DIAMETER_RE = re.compile(r"gabor_(\d*\.{0,1}\d*)_{0,1}deg(?:_\d+ms){0,1}")
GENERIC_MOVIE_RE = re.compile(
r"natural_movie_(?P<number>\d+|one|two|three|four|five|six|seven|eight|nine)(_shuffled){0,1}(_more_repeats){0,1}"
)
DIGIT_NAMES = {
"1": "one",
"2": "two",
"3": "three",
"4": "four",
"5": "five",
"6": "six",
"7": "seven",
"8": "eight",
"9": "nine",
}
SHUFFLED_MOVIE_RE = re.compile(r"natural_movie_shuffled")
NUMERAL_RE = re.compile(r"(?P<number>\d+)")
[docs]def drop_empty_columns(table):
""" Remove from the stimulus table columns whose values are all nan
"""
to_drop = []
for colname in table.columns:
if table[colname].isna().all():
to_drop.append(colname)
table.drop(columns=to_drop, inplace=True)
return table
[docs]def collapse_columns(table):
""" merge, where possible, columns that describe the same parameter. This is pretty conservative - it
only matches columns by capitalization and it only overrides nans.
"""
colnames = set(table.columns)
matches = []
for col in table.columns:
for transformed in (col.upper(), col.capitalize()):
if transformed in colnames and col != transformed:
col_notna = ~(table[col].isna())
trans_notna = ~(table[transformed].isna())
if (col_notna & trans_notna).sum() != 0:
continue
mask = ~(col_notna) & (trans_notna)
matches.append(transformed)
table.loc[mask, col] = table[transformed][mask]
break
table.drop(columns=matches, inplace=True)
return table
[docs]def add_number_to_shuffled_movie(
table,
natural_movie_re=GENERIC_MOVIE_RE,
template_re=SHUFFLED_MOVIE_RE,
stim_colname="stimulus_name",
template="natural_movie_{}_shuffled",
tmp_colname="__movie_number__",
):
"""
"""
if not table[stim_colname].str.contains(SHUFFLED_MOVIE_RE).any():
return table
table = table.copy()
table[tmp_colname] = table[stim_colname].str.extract(natural_movie_re, expand=True)[
"number"
]
unique_numbers = [
item for item in table[tmp_colname].dropna(inplace=False).unique()
]
if len(unique_numbers) != 1:
raise ValueError(
f"unable to uniquely determine a movie number for this session. Candidates: {unique_numbers}"
)
movie_number = unique_numbers[0]
def renamer(row):
if not isinstance(row[stim_colname], str):
return row[stim_colname]
if not template_re.match(row[stim_colname]):
return row[stim_colname]
else:
return template.format(movie_number)
table[stim_colname] = table.apply(renamer, axis=1)
table.drop(columns=tmp_colname, inplace=True)
return table
[docs]def standardize_movie_numbers(
table,
movie_re=GENERIC_MOVIE_RE,
numeral_re=NUMERAL_RE,
digit_names=DIGIT_NAMES,
stim_colname="stimulus_name",
):
""" Natural movie stimuli in visual coding are numbered using words, like "natural_movie_two" rather than
"natural_movie_2". This function ensures that all of the natural movie stimuli in an experiment are named by
that convention.
Parameters
----------
table : pd.DataFrame
the incoming stimulus table
movie_re : re.Pattern, optional
regex that matches movie stimulus names
numeral_re : re.Pattern, optional
regex that extracts movie numbers from stimulus names
digit_names : dict, optional
map from numerals to english words
stim_colname : str, optional
the name of the dataframe column that contains stimulus names
Returns
-------
table : pd.DataFrame
the stimulus table with movie numerals having been mapped to english words
"""
replace = lambda match_obj: digit_names[match_obj["number"]]
# for some reason pandas really wants us to use the captures
warnings.filterwarnings("ignore", "This pattern has match groups")
movie_rows = table[stim_colname].str.contains(movie_re, na=False)
table.loc[movie_rows, stim_colname] = table.loc[
movie_rows, stim_colname
].str.replace(numeral_re, replace)
return table
[docs]def map_stimulus_names(table, name_map=None, stim_colname="stimulus_name"):
""" Applies a mappting to the stimulus names in a stimulus table
Parameters
----------
table : pd.DataFrame
the input stimulus table
name_map : dict, optional
rename the stimuli according to this mapping
stim_colname: str, optional
look in this column for stimulus names
"""
if name_map is None:
return table
if "" in name_map:
name_map[np.nan] = name_map[""]
table[stim_colname] = table[stim_colname].replace(
to_replace=name_map, inplace=False
)
return table
[docs]def map_column_names(table, name_map=None, ignore_case=True):
if ignore_case and name_map is not None:
name_map = {key.lower(): value for key, value in name_map.items()}
mapper = lambda name: name if name.lower() not in name_map else name_map[name.lower()]
else:
mapper = name_map
return table.rename(columns=mapper)