# Allen Institute Software License - This software license is the 2-clause BSD
# license plus a third clause that prohibits redistribution for commercial
# purposes without further permission.
#
# Copyright 2017. Allen Institute. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Redistributions for commercial purposes are not permitted without the
# Allen Institute's written permission.
# For purposes of this license, commercial purposes is the incorporation of the
# Allen Institute's software into anything for which you will charge fees or
# other compensation. Contact terms@alleninstitute.org for commercial licensing
# opportunities.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#
import os
import six
import numpy as np
import pandas as pd
from pathlib import Path
from allensdk.api.cache import Cache, get_default_manifest_file
from allensdk.api.queries.brain_observatory_api import BrainObservatoryApi
from allensdk.config.manifest_builder import ManifestBuilder
from .brain_observatory_nwb_data_set import BrainObservatoryNwbDataSet
import allensdk.brain_observatory.stimulus_info as stim_info
from allensdk.brain_observatory.locally_sparse_noise import LocallySparseNoise
from allensdk.brain_observatory.natural_scenes import NaturalScenes
from allensdk.brain_observatory.natural_movie import NaturalMovie
from allensdk.brain_observatory.static_gratings import StaticGratings
from allensdk.brain_observatory.drifting_gratings import DriftingGratings
from allensdk.brain_observatory.nwb import (read_eye_gaze_mappings,
create_eye_gaze_mapping_dataframe)
# NOTE: This is a really ugly hack to get around the fact that warehouse does
# not have Ophys session ids associated with experiment ids.
from .ophys_experiment_session_id_mapping import ophys_experiment_session_id_map
ANALYSIS_CLASS_DICT = {stim_info.LOCALLY_SPARSE_NOISE: LocallySparseNoise,
stim_info.LOCALLY_SPARSE_NOISE_4DEG: LocallySparseNoise,
stim_info.LOCALLY_SPARSE_NOISE_8DEG: LocallySparseNoise,
stim_info.NATURAL_MOVIE_ONE:NaturalMovie,
stim_info.NATURAL_MOVIE_TWO:NaturalMovie,
stim_info.NATURAL_MOVIE_THREE:NaturalMovie,
stim_info.NATURAL_SCENES:NaturalScenes,
stim_info.STATIC_GRATINGS:StaticGratings,
stim_info.DRIFTING_GRATINGS:DriftingGratings}
[docs]class BrainObservatoryCache(Cache):
"""
Cache class for storing and accessing data from the Brain Observatory.
By default, this class will cache any downloaded metadata or files in
well known locations defined in a manifest file. This behavior can be
disabled.
Attributes
----------
api: BrainObservatoryApi instance
The object used for making API queries related to the Brain
Observatory.
Parameters
----------
cache: boolean
Whether the class should save results of API queries to locations specified
in the manifest file. Queries for files (as opposed to metadata) must have a
file location. If caching is disabled, those locations must be specified
in the function call (e.g. get_ophys_experiment_data(file_name='file.nwb')).
manifest_file: string
File name of the manifest to be read. Default is "brain_observatory_manifest.json".
"""
EXPERIMENT_CONTAINERS_KEY = 'EXPERIMENT_CONTAINERS'
EXPERIMENTS_KEY = 'EXPERIMENTS'
CELL_SPECIMENS_KEY = 'CELL_SPECIMENS'
EXPERIMENT_DATA_KEY = 'EXPERIMENT_DATA'
ANALYSIS_DATA_KEY = 'ANALYSIS_DATA'
EVENTS_DATA_KEY = 'EVENTS_DATA'
STIMULUS_MAPPINGS_KEY = 'STIMULUS_MAPPINGS'
EYE_GAZE_DATA_KEY = 'EYE_GAZE_DATA'
MANIFEST_VERSION = '1.3'
def __init__(self, cache=True, manifest_file=None, base_uri=None, api=None):
if manifest_file is None:
manifest_file = get_default_manifest_file('brain_observatory')
super(BrainObservatoryCache, self).__init__(
manifest=manifest_file, cache=cache, version=self.MANIFEST_VERSION)
if api is None:
self.api = BrainObservatoryApi(base_uri=base_uri)
else:
self.api = api
[docs] def get_all_targeted_structures(self):
""" Return a list of all targeted structures in the data set. """
containers = self.get_experiment_containers(simple=False)
targeted_structures = set(
[c['targeted_structure']['acronym'] for c in containers])
return sorted(list(targeted_structures))
[docs] def get_all_cre_lines(self):
""" Return a list of all cre driver lines in the data set. """
containers = self.get_experiment_containers(simple=True)
cre_lines = set([c['cre_line'] for c in containers])
return sorted(list(cre_lines))
[docs] def get_all_reporter_lines(self):
""" Return a list of all reporter lines in the data set. """
containers = self.get_experiment_containers(simple=True)
reporter_lines = set([c['reporter_line'] for c in containers])
return sorted(list(reporter_lines))
[docs] def get_all_imaging_depths(self):
""" Return a list of all imaging depths in the data set. """
containers = self.get_experiment_containers(simple=True)
imaging_depths = set([c['imaging_depth'] for c in containers])
return sorted(list(imaging_depths))
[docs] def get_all_session_types(self):
""" Return a list of all stimulus sessions in the data set. """
exps = self.get_ophys_experiments(simple=False)
names = set([exp['stimulus_name'] for exp in exps])
return sorted(list(names))
[docs] def get_all_stimuli(self):
""" Return a list of all stimuli in the data set. """
return sorted(list(stim_info.all_stimuli()))
[docs] def get_experiment_containers(self, file_name=None,
ids=None,
targeted_structures=None,
imaging_depths=None,
cre_lines=None,
reporter_lines=None,
transgenic_lines=None,
include_failed=False,
simple=True):
""" Get a list of experiment containers matching certain criteria.
Parameters
----------
file_name: string
File name to save/read the experiment containers. If file_name is None,
the file_name will be pulled out of the manifest. If caching
is disabled, no file will be saved. Default is None.
ids: list
List of experiment container ids.
targeted_structures: list
List of structure acronyms. Must be in the list returned by
BrainObservatoryCache.get_all_targeted_structures().
imaging_depths: list
List of imaging depths. Must be in the list returned by
BrainObservatoryCache.get_all_imaging_depths().
cre_lines: list
List of cre lines. Must be in the list returned by
BrainObservatoryCache.get_all_cre_lines().
reporter_lines: list
List of reporter lines. Must be in the list returned by
BrainObservatoryCache.get_all_reporter_lines().
transgenic_lines: list
List of transgenic lines. Must be in the list returned by
BrainObservatoryCache.get_all_cre_lines() or.
BrainObservatoryCache.get_all_reporter_lines().
include_failed: boolean
Whether or not to include failed experiment containers.
simple: boolean
Whether or not to simplify the dictionary properties returned by this method
to a more concise subset.
Returns
-------
list of dictionaries
"""
_assert_not_string(targeted_structures, "targeted_structures")
_assert_not_string(cre_lines, "cre_lines")
_assert_not_string(reporter_lines, "reporter_lines")
_assert_not_string(transgenic_lines, "transgenic_lines")
file_name = self.get_cache_path(
file_name, self.EXPERIMENT_CONTAINERS_KEY)
containers = self.api.get_experiment_containers(path=file_name,
strategy='lazy',
**Cache.cache_json())
containers = self.api.filter_experiment_containers(containers, ids=ids,
targeted_structures=targeted_structures,
imaging_depths=imaging_depths,
cre_lines=cre_lines,
reporter_lines=reporter_lines,
transgenic_lines=transgenic_lines,
include_failed=include_failed,
simple=simple)
return containers
[docs] def get_ophys_experiment_stimuli(self, experiment_id):
""" For a single experiment, return the list of stimuli present in that experiment. """
exps = self.get_ophys_experiments(ids=[experiment_id])
if len(exps) == 0:
return None
return stim_info.stimuli_in_session(exps[0]['session_type'])
[docs] def get_ophys_experiments(self, file_name=None,
ids=None,
experiment_container_ids=None,
targeted_structures=None,
imaging_depths=None,
cre_lines=None,
reporter_lines=None,
transgenic_lines=None,
stimuli=None,
session_types=None,
cell_specimen_ids=None,
include_failed=False,
require_eye_tracking=False,
simple=True):
""" Get a list of ophys experiments matching certain criteria.
Parameters
----------
file_name: string
File name to save/read the ophys experiments. If file_name is None,
the file_name will be pulled out of the manifest. If caching
is disabled, no file will be saved. Default is None.
ids: list
List of ophys experiment ids.
experiment_container_ids: list
List of experiment container ids.
targeted_structures: list
List of structure acronyms. Must be in the list returned by
BrainObservatoryCache.get_all_targeted_structures().
imaging_depths: list
List of imaging depths. Must be in the list returned by
BrainObservatoryCache.get_all_imaging_depths().
cre_lines: list
List of cre lines. Must be in the list returned by
BrainObservatoryCache.get_all_cre_lines().
reporter_lines: list
List of reporter lines. Must be in the list returned by
BrainObservatoryCache.get_all_reporter_lines().
transgenic_lines: list
List of transgenic lines. Must be in the list returned by
BrainObservatoryCache.get_all_cre_lines() or.
BrainObservatoryCache.get_all_reporter_lines().
stimuli: list
List of stimulus names. Must be in the list returned by
BrainObservatoryCache.get_all_stimuli().
session_types: list
List of stimulus session type names. Must be in the list returned by
BrainObservatoryCache.get_all_session_types().
cell_specimen_ids: list
Only include experiments that contain cells with these ids.
include_failed: boolean
Whether or not to include experiments from failed experiment containers.
simple: boolean
Whether or not to simplify the dictionary properties returned by this method
to a more concise subset.
require_eye_tracking: boolean
If True, only return experiments that have eye tracking results. Default: False.
Returns
-------
list of dictionaries
"""
_assert_not_string(targeted_structures, "targeted_structures")
_assert_not_string(cre_lines, "cre_lines")
_assert_not_string(reporter_lines, "reporter_lines")
_assert_not_string(transgenic_lines, "transgenic_lines")
_assert_not_string(stimuli, "stimuli")
_assert_not_string(session_types, "session_types")
file_name = self.get_cache_path(file_name, self.EXPERIMENTS_KEY)
exps = self.api.get_ophys_experiments(path=file_name,
strategy='lazy',
**Cache.cache_json())
# NOTE: Ugly hack to update the 'fail_eye_tracking' field
# which is using True/False values for the previous eye mapping
# implementation. This will also need to be fixed in warehouse.
# ----- Start of ugly hack -----
response = self.api.template_query('brain_observatory_queries',
'all_eye_mapping_files')
session_ids_with_eye_tracking: set = {entry['attachable_id']
for entry in response
if entry['attachable_type'] == "OphysSession"}
for indx, exp in enumerate(exps):
try:
ophys_session_id = ophys_experiment_session_id_map[exp['id']]
if ophys_session_id in session_ids_with_eye_tracking:
exps[indx]['fail_eye_tracking'] = False
else:
exps[indx]['fail_eye_tracking'] = True
except KeyError:
exps[indx]['fail_eye_tracking'] = True
# ----- End of ugly hack -----
if cell_specimen_ids is not None:
cells = self.get_cell_specimens(ids=cell_specimen_ids)
cell_container_ids = set([cell['experiment_container_id'] for cell in cells])
if experiment_container_ids is not None:
experiment_container_ids = list(set(experiment_container_ids) - cell_container_ids)
else:
experiment_container_ids = list(cell_container_ids)
exps = self.api.filter_ophys_experiments(exps,
ids=ids,
experiment_container_ids=experiment_container_ids,
targeted_structures=targeted_structures,
imaging_depths=imaging_depths,
cre_lines=cre_lines,
reporter_lines=reporter_lines,
transgenic_lines=transgenic_lines,
stimuli=stimuli,
session_types=session_types,
include_failed=include_failed,
require_eye_tracking=require_eye_tracking,
simple=simple)
return exps
def _get_stimulus_mappings(self, file_name=None):
""" Returns a mapping of which metrics are related to which stimuli. Internal use only. """
file_name = self.get_cache_path(file_name, self.STIMULUS_MAPPINGS_KEY)
mappings = self.api.get_stimulus_mappings(path=file_name,
strategy='lazy',
**Cache.cache_json())
return mappings
[docs] def get_cell_specimens(self,
file_name=None,
ids=None,
experiment_container_ids=None,
include_failed=False,
simple=True,
filters=None):
""" Return cell specimens that have certain properies.
Parameters
----------
file_name: string
File name to save/read the cell specimens. If file_name is None,
the file_name will be pulled out of the manifest. If caching
is disabled, no file will be saved. Default is None.
ids: list
List of cell specimen ids.
experiment_container_ids: list
List of experiment container ids.
include_failed: bool
Whether to include cells from failed experiment containers
simple: boolean
Whether or not to simplify the dictionary properties returned by this method
to a more concise subset.
filters: list of dicts
List of filter dictionaries. The Allen Brain Observatory web site can
generate filters in this format to reproduce a filtered set of cells
found there. To see what these look like, visit
http://observatory.brain-map.org/visualcoding, perform a cell search
and apply some filters (e.g. find cells in a particular area), then
click the "view these cells in the AllenSDK" link on the bottom-left
of the search results page. This will take you to a page that contains
a code sample you can use to apply those same filters via this argument.
For more detail on the filter syntax, see BrainObservatoryApi.dataframe_query.
Returns
-------
list of dictionaries
"""
file_name = self.get_cache_path(file_name, self.CELL_SPECIMENS_KEY)
cell_specimens = self.api.get_cell_metrics(path=file_name,
strategy='lazy',
pre= lambda x: [y for y in x],
**Cache.cache_json())
cell_specimens = self.api.filter_cell_specimens(cell_specimens,
ids=ids,
experiment_container_ids=experiment_container_ids,
include_failed=include_failed,
filters=filters)
# drop the thumbnail columns
if simple:
mappings = self._get_stimulus_mappings()
thumbnails = [m['item'] for m in mappings if m[
'item_type'] == 'T' and m['level'] == 'R']
for cs in cell_specimens:
for t in thumbnails:
del cs[t]
return cell_specimens
[docs] def get_nwb_filepath(self, ophys_experiment_id=None):
cache_nwb_filepath = self.get_cache_path(None, self.EXPERIMENT_DATA_KEY, ophys_experiment_id)
if os.path.exists(cache_nwb_filepath):
return cache_nwb_filepath
else:
return None
[docs] def get_ophys_experiment_data(self, ophys_experiment_id, file_name=None):
""" Download the NWB file for an ophys_experiment (if it hasn't already been
downloaded) and return a data accessor object.
Parameters
----------
file_name: string
File name to save/read the data set. If file_name is None,
the file_name will be pulled out of the manifest. If caching
is disabled, no file will be saved. Default is None.
ophys_experiment_id: integer
id of the ophys_experiment to retrieve
Returns
-------
BrainObservatoryNwbDataSet
"""
file_name = self.get_cache_path(
file_name, self.EXPERIMENT_DATA_KEY, ophys_experiment_id)
self.api.save_ophys_experiment_data(ophys_experiment_id, file_name, strategy='lazy')
return BrainObservatoryNwbDataSet(file_name)
[docs] def get_ophys_experiment_analysis(self, ophys_experiment_id, stimulus_type, file_name=None):
""" Download the h5 analysis file for a stimulus set, for a particular ophys_experiment
(if it hasn't already been downloaded) and return a data accessor object.
Parameters
----------
file_name: string
File name to save/read the data set. If file_name is None,
the file_name will be pulled out of the manifest. If caching
is disabled, no file will be saved. Default is None.
ophys_experiment_id: int
id of the ophys_experiment to retrieve
stimulus_name: str
stimulus type; should be an element of self.list_stimuli()
Returns
-------
BrainObservatoryNwbDataSet
"""
data_set = self.get_ophys_experiment_data(ophys_experiment_id, file_name=None)
session_type = data_set.get_session_type()
if not stimulus_type in stim_info.SESSION_STIMULUS_MAP[session_type]:
raise RuntimeError('Stimulus %s not available session type: %s' % (stimulus_type, stim_info.SESSION_STIMULUS_MAP[stimulus_type]))
# Use manifest to figure out where to cache the file:
file_name = self.get_cache_path(file_name, self.ANALYSIS_DATA_KEY, ophys_experiment_id, session_type)
# Cache the analsis file from an RMA query:
self.api.save_ophys_experiment_analysis_data(ophys_experiment_id, file_name, strategy='lazy')
# Get the analysis class from ANALYSIS_CLASS_DICT, and build from the static method:
if stimulus_type in stim_info.LOCALLY_SPARSE_NOISE_STIMULUS_TYPES+stim_info.NATURAL_MOVIE_STIMULUS_TYPES:
return ANALYSIS_CLASS_DICT[stimulus_type].from_analysis_file(data_set, file_name, stimulus_type)
else:
return ANALYSIS_CLASS_DICT[stimulus_type].from_analysis_file(data_set, file_name)
[docs] def get_ophys_experiment_events(self, ophys_experiment_id, file_name=None):
""" Download the npz events file for an ophys_experiment if it hasn't
already been downloaded and return the events array.
Parameters
----------
file_name: string
File name to save/read the data set. If file_name is None,
the file_name will be pulled out of the manifest. If caching
is disabled, no file will be saved. Default is None.
ophys_experiment_id: int
id of the ophys_experiment to retrieve events for
Returns
-------
events: numpy.ndarray
[N_cells,N_times] array of events.
"""
file_name = self.get_cache_path(
file_name, self.EVENTS_DATA_KEY, ophys_experiment_id)
self.api.save_ophys_experiment_event_data(ophys_experiment_id, file_name, strategy='lazy')
return np.load(file_name, allow_pickle=False)["ev"]
[docs] def get_ophys_pupil_data(self,
ophys_experiment_id: int,
file_name: str = None,
suppress_pupil_data: bool = True) -> pd.DataFrame:
"""Download the h5 eye gaze mapping file for an ophys_experiment if
it hasn't already been downloaded and return it as a pandas.DataFrame.
Parameters
----------
file_name: string
File name to save/read the data set. If file_name is None,
the file_name will be pulled out of the manifest. If caching
is disabled, no file will be saved. Default is None.
ophys_experiment_id: int
id of the ophys_experiment to retrieve pupil data for.
suppress_pupil_data: bool
Whether or not to suppress pupil data from dataset.
Default is True.
Returns
-------
pd.DataFrame
If 'suppress_eye_gaze_data' is set to 'False':
Contains raw/filtered columns for gaze mapping:
*_eye_area
*_pupil_area
*_screen_coordinates_x_cm
*_screen_coordinates_y_cm
*_screen_coordinates_spherical_x_deg
*_screen_coorindates_spherical_y_deg
Otherwise:
An empty pandas DataFrame
"""
if suppress_pupil_data:
print("This pupil data is obtained using a new eye "
"tracking algorithm and is in the process of being validated. "
"If you would like to view the data anyways, "
"please set the 'suppress_pupil_data' parameter to 'False'.")
return pd.DataFrame()
# NOTE: This is a really ugly hack to get around the fact that warehouse does
# not have Ophys session ids associated with experiment ids. This should be
# removed when warehouse session ids have associations with experiment ids.
# ----- Start of ugly hack -----
try:
ophys_session_id = ophys_experiment_session_id_map[ophys_experiment_id]
except KeyError:
raise RuntimeError(f"Experiment id '{ophys_experiment_id}' has no associated session!")
# ----- End of ugly hack -----
file_name = self.get_cache_path(file_name,
self.EYE_GAZE_DATA_KEY,
ophys_session_id)
if not file_name:
raise RuntimeError("Could not obtain a file_name for pupil data "
f"with experiment id: {ophys_experiment_id} "
f"(session id: {ophys_session_id})")
# NOTE: `save_ophys_experiment_eye_gaze_data` will also need to be
# updated to remove ophy_session_id param when ugly hack is removed.
self.api.save_ophys_experiment_eye_gaze_data(ophys_experiment_id,
ophys_session_id,
file_name,
strategy='lazy')
gaze_mapping_data = read_eye_gaze_mappings(Path(file_name))
return create_eye_gaze_mapping_dataframe(gaze_mapping_data)
[docs] def build_manifest(self, file_name):
"""
Construct a manifest for this Cache class and save it in a file.
Parameters
----------
file_name: string
File location to save the manifest.
"""
mb = ManifestBuilder()
mb.set_version(self.MANIFEST_VERSION)
mb.add_path('BASEDIR', '.')
mb.add_path(self.EXPERIMENT_CONTAINERS_KEY,
'experiment_containers.json', typename='file', parent_key='BASEDIR')
mb.add_path(self.EXPERIMENTS_KEY, 'ophys_experiments.json',
typename='file', parent_key='BASEDIR')
mb.add_path(self.EXPERIMENT_DATA_KEY, 'ophys_experiment_data/%d.nwb',
typename='file', parent_key='BASEDIR')
mb.add_path(self.ANALYSIS_DATA_KEY, 'ophys_experiment_analysis/%d_%s_analysis.h5',
typename='file', parent_key='BASEDIR')
mb.add_path(self.EVENTS_DATA_KEY, 'ophys_experiment_events/%d_events.npz',
typename='file', parent_key='BASEDIR')
mb.add_path(self.CELL_SPECIMENS_KEY, 'cell_specimens.json',
typename='file', parent_key='BASEDIR')
mb.add_path(self.STIMULUS_MAPPINGS_KEY, 'stimulus_mappings.json',
typename='file', parent_key='BASEDIR')
mb.add_path(self.EYE_GAZE_DATA_KEY, 'ophys_eye_gaze_mapping/%d_eyetracking_dlc_to_screen_mapping.h5',
typename='file', parent_key='BASEDIR')
mb.write_json_file(file_name)
def _assert_not_string(arg, name):
if isinstance(arg, six.string_types):
raise TypeError(
"Argument '%s' with value '%s' is a string type, but should be a list." % (name, arg))