# Allen Institute Software License - This software license is the 2-clause BSD
# license plus a third clause that prohibits redistribution for commercial
# purposes without further permission.
#
# Copyright 2017. Allen Institute. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Redistributions for commercial purposes are not permitted without the
# Allen Institute's written permission.
# For purposes of this license, commercial purposes is the incorporation of the
# Allen Institute's software into anything for which you will charge fees or
# other compensation. Contact terms@alleninstitute.org for commercial licensing
# opportunities.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#
import logging
import pandas as pd
from six import string_types
from allensdk.config.manifest import Manifest
import allensdk.brain_observatory.stimulus_info as stimulus_info
from .rma_template import RmaTemplate
from ..cache import cacheable, Cache
from .rma_pager import pageable
from dateutil.parser import parse as parse_date
[docs]class BrainObservatoryApi(RmaTemplate):
_log = logging.getLogger('allensdk.api.queries.brain_observatory_api')
NWB_FILE_TYPE = 'NWBOphys'
OPHYS_ANALYSIS_FILE_TYPE = 'OphysExperimentCellRoiMetricsFile'
OPHYS_EVENTS_FILE_TYPE = 'ObservatoryEventsFile'
CELL_MAPPING_ID = 590985414
rma_templates = \
{"brain_observatory_queries": [
{'name': 'list_isi_experiments',
'description': 'see name',
'model': 'IsiExperiment',
'num_rows': 'all',
'count': False,
'criteria_params': []
},
{'name': 'isi_experiment_by_ids',
'description': 'see name',
'model': 'IsiExperiment',
'criteria': '[id$in{{ isi_experiment_ids }}]',
'include': 'experiment_container(ophys_experiments,targeted_structure)',
'num_rows': 'all',
'count': False,
'criteria_params': ['isi_experiment_ids']
},
{'name': 'ophys_experiment_by_ids',
'description': 'see name',
'model': 'OphysExperiment',
'criteria': '{% if ophys_experiment_ids is defined %}[id$in{{ ophys_experiment_ids }}]{%endif%}',
'include': 'experiment_container,well_known_files(well_known_file_type),targeted_structure,specimen(donor(age,transgenic_lines))',
'num_rows': 'all',
'count': False,
'criteria_params': ['ophys_experiment_ids']
},
{'name': 'ophys_experiment_data',
'description': 'see name',
'model': 'WellKnownFile',
'criteria': '[attachable_id$eq{{ ophys_experiment_id }}],well_known_file_type[name$eq%s]' % NWB_FILE_TYPE,
'num_rows': 'all',
'count': False,
'criteria_params': ['ophys_experiment_id']
},
{'name': 'ophys_analysis_file',
'description': 'see name',
'model': 'WellKnownFile',
'criteria': '[attachable_id$eq{{ ophys_experiment_id }}],well_known_file_type[name$eq%s]' % OPHYS_ANALYSIS_FILE_TYPE,
'num_rows': 'all',
'count': False,
'criteria_params': ['ophys_experiment_id']
},
{'name': 'ophys_events_file',
'description': 'see name',
'model': 'WellKnownFile',
'criteria': '[attachable_id$eq{{ ophys_experiment_id }}],well_known_file_type[name$eq%s]' % OPHYS_EVENTS_FILE_TYPE,
'num_rows': 'all',
'count': False,
'criteria_params': ['ophys_experiment_id']
},
{'name': 'column_definitions',
'description': 'see name',
'model': 'ApiColumnDefinition',
'criteria': '[api_class_name$eq{{ api_class_name }}]',
'num_rows': 'all',
'count': False,
'criteria_params': ['api_class_name']
},
{'name': 'column_definition_class_names',
'description': 'see name',
'model': 'ApiColumnDefinition',
'only': ['api_class_name'],
'num_rows': 'all',
'count': False,
},
{'name': 'stimulus_mapping',
'description': 'see name',
'model': 'ApiCamStimulusMapping',
'criteria': '{% if stimulus_mapping_ids is defined %}[id$in{{ stimulus_mapping_ids }}]{%endif%}',
'num_rows': 'all',
'count': False,
'criteria_params': ['stimulus_mapping_ids']
},
{'name': 'experiment_container',
'description': 'see name',
'model': 'ExperimentContainer',
'criteria': '{% if experiment_container_ids is defined %}[id$in{{ experiment_container_ids }}]{%endif%}',
'include': 'ophys_experiments,isi_experiment,specimen(donor(conditions,age,transgenic_lines)),targeted_structure',
'num_rows': 'all',
'count': False,
'criteria_params': ['experiment_container_ids']
},
{'name': 'experiment_container_metric',
'description': 'see name',
'model': 'ApiCamExperimentContainerMetric',
'criteria': '{% if experiment_container_metric_ids is defined %}[id$in{{ experiment_container_metric_ids }}]{%endif%}',
'num_rows': 'all',
'count': False,
'criteria_params': ['experiment_container_metric_ids']
},
{'name': 'cell_metric',
'description': 'see name',
'model': 'ApiCamCellMetric',
'criteria': '{% if cell_specimen_ids is defined %}[cell_specimen_id$in{{ cell_specimen_ids }}]{%endif%}',
'criteria_params': ['cell_specimen_ids']
},
{'name': 'cell_specimen_id_mapping_table',
'description': 'see name',
'model': 'WellKnownFile',
'criteria': '[id$eq{{ mapping_table_id }}],well_known_file_type[name$eqOphysCellSpecimenIdMapping]',
'num_rows': 'all',
'count': False,
'criteria_params': ['mapping_table_id']
},
{'name': 'eye_gaze_mapping_file',
'description': 'h5 file containing mouse eye gaze mapped onto screen coordinates (as well as pupil and eye sizes)',
'model': 'WellKnownFile',
'criteria': '[attachable_id$eq{{ ophys_session_id }}],well_known_file_type[name$eqEyeDlcScreenMapping]',
'num_rows': 'all',
'count': False,
'criteria_params': ['ophys_session_id']
},
# NOTE: 'all_eye_mapping_files' query is for facilitating an ugly
# hack to get around lack of relationship between experiment id
# and session id in current warehouse. This should be removed when
# the relationship is added.
{'name': 'all_eye_mapping_files',
'description': 'Get a list of dictionaries for all eye mapping wkfs',
'model': 'WellKnownFile',
'criteria': 'well_known_file_type[name$eqEyeDlcScreenMapping]',
'num_rows': 'all',
'count': False
}
]}
_QUERY_TEMPLATES = {
"=": '({0} == {1})',
"<": '({0} < {1})',
">": '({0} > {1})',
"<=": '({0} <= {1})',
">=": '({0} >= {1})',
"between": '({0} >= {1}) and ({0} <= {2})',
"in": '({0} == {1})',
"is": '({0} == {1})'
}
def __init__(self, base_uri=None, datacube_uri=None):
super(BrainObservatoryApi, self).__init__(base_uri,
query_manifest=BrainObservatoryApi.rma_templates)
self.datacube_uri = datacube_uri
[docs] @cacheable()
def get_ophys_experiments(self, ophys_experiment_ids=None):
''' Get OPhys Experiments by id
Parameters
----------
ophys_experiment_ids : integer or list of integers, optional
only select specific experiments.
Returns
-------
dict : ophys experiment metadata
'''
data = self.template_query('brain_observatory_queries',
'ophys_experiment_by_ids',
ophys_experiment_ids=ophys_experiment_ids)
return data
[docs] def get_isi_experiments(self, isi_experiment_ids=None):
''' Get ISI Experiments by id
Parameters
----------
isi_experiment_ids : integer or list of integers, optional
only select specific experiments.
Returns
-------
dict : isi experiment metadata
'''
data = self.template_query('brain_observatory_queries',
'isi_experiment_by_ids',
isi_experiment_ids=isi_experiment_ids)
return data
[docs] def list_isi_experiments(self, isi_ids=None):
'''List ISI experiments available through the Allen Institute API
Parameters
----------
neuronal_model_ids : integer or list of integers, optional
only select specific isi experiments.
Returns
-------
dict : neuronal model metadata
'''
data = self.template_query('brain_observatory_queries',
'list_isi_experiments')
return data
[docs] def list_column_definition_class_names(self):
''' Get column definitions
Parameters
----------
Returns
-------
list : api class name strings
'''
data = self.template_query('brain_observatory_queries',
'column_definition_class_names')
names = list(set([n['api_class_name'] for n in data]))
return names
[docs] def get_column_definitions(self, api_class_name=None):
''' Get column definitions
Parameters
----------
api_class_names : string or list of strings, optional
only select specific column definition records.
Returns
-------
dict : column definition metadata
'''
data = self.template_query('brain_observatory_queries',
'column_definitions',
api_class_name=api_class_name)
return data
[docs] @cacheable()
def get_stimulus_mappings(self, stimulus_mapping_ids=None):
''' Get stimulus mappings by id
Parameters
----------
stimulus_mapping_ids : integer or list of integers, optional
only select specific stimulus mapping records.
Returns
-------
dict : stimulus mapping metadata
'''
data = self.template_query('brain_observatory_queries',
'stimulus_mapping',
stimulus_mapping_ids=stimulus_mapping_ids)
return data
[docs] @cacheable()
@pageable(num_rows=2000, total_rows='all')
def get_cell_metrics(self, cell_specimen_ids=None, *args, **kwargs):
''' Get cell metrics by id
Parameters
----------
cell_metrics_ids : integer or list of integers, optional
only select specific cell metric records.
Returns
-------
dict : cell metric metadata
'''
order = kwargs.pop('order', ['\'cell_specimen_id\''])
data = self.template_query('brain_observatory_queries',
'cell_metric',
cell_specimen_ids=cell_specimen_ids,
order=order,
*args,
**kwargs)
return data
[docs] @cacheable()
def get_experiment_containers(self, experiment_container_ids=None):
''' Get experiment container by id
Parameters
----------
experiment_container_ids : integer or list of integers, optional
only select specific experiment containers.
Returns
-------
dict : experiment container metadata
'''
data = self.template_query('brain_observatory_queries',
'experiment_container',
experiment_container_ids=experiment_container_ids)
return data
[docs] def get_experiment_container_metrics(self, experiment_container_metric_ids=None):
''' Get experiment container metrics by id
Parameters
----------
isi_experiment_ids : integer or list of integers, optional
only select specific experiments.
Returns
-------
dict : isi experiment metadata
'''
data = self.template_query('brain_observatory_queries',
'experiment_container_metric',
experiment_container_metric_ids=experiment_container_metric_ids)
return data
[docs] @cacheable(strategy='create',
pathfinder=Cache.pathfinder(file_name_position=2,
path_keyword='file_name'))
def save_ophys_experiment_data(self, ophys_experiment_id, file_name):
data = self.template_query('brain_observatory_queries',
'ophys_experiment_data',
ophys_experiment_id=ophys_experiment_id)
try:
file_url = data[0]['download_link']
except Exception as _:
raise Exception("ophys experiment %d has no data file" %
ophys_experiment_id)
self._log.warning(
"Downloading ophys_experiment %d NWB. This can take some time." % ophys_experiment_id)
self.retrieve_file_over_http(self.api_url + file_url, file_name)
[docs] @cacheable(strategy='create',
pathfinder=Cache.pathfinder(file_name_position=2,
path_keyword='file_name'))
def save_ophys_experiment_analysis_data(self, ophys_experiment_id, file_name):
data = self.template_query('brain_observatory_queries',
'ophys_analysis_file',
ophys_experiment_id=ophys_experiment_id)
try:
file_url = data[0]['download_link']
except Exception as _:
raise Exception("ophys experiment %d has no %s analysis file" %
(ophys_experiment_id, ))
self._log.warning(
"Downloading ophys_experiment %d analysis file. This can take some time." % (ophys_experiment_id, ))
self.retrieve_file_over_http(self.api_url + file_url, file_name)
[docs] @cacheable(strategy='create',
pathfinder=Cache.pathfinder(file_name_position=2,
path_keyword='file_name'))
def save_ophys_experiment_event_data(self, ophys_experiment_id, file_name):
data = self.template_query('brain_observatory_queries',
'ophys_events_file',
ophys_experiment_id=ophys_experiment_id)
try:
file_url = data[0]['download_link']
except Exception:
raise Exception("ophys experiment %d has no events file" %
ophys_experiment_id)
self._log.warning(
"Downloading ophys_experiment %d events file. This can take some time." % ophys_experiment_id)
self.retrieve_file_over_http(self.api_url + file_url, file_name)
[docs] @cacheable(strategy='create',
pathfinder=Cache.pathfinder(file_name_position=3,
path_keyword='file_name'))
def save_ophys_experiment_eye_gaze_data(self,
ophys_experiment_id: int,
ophys_session_id: int,
file_name: str):
data = self.template_query('brain_observatory_queries',
'eye_gaze_mapping_file',
ophys_session_id=ophys_session_id)
experiment_session_string = f"ophys_experiment '{ophys_experiment_id}' (session '{ophys_session_id}')"
try:
file_url = data[0]['download_link']
except Exception:
raise Exception(f"{experiment_session_string} has no eye gaze mapping file")
self._log.warning(
f"Downloading {experiment_session_string} gaze mapping file. This can take some time."
)
self.retrieve_file_over_http(self.api_url + file_url, file_name)
[docs] def filter_experiments_and_containers(self, objs,
ids=None,
targeted_structures=None,
imaging_depths=None,
cre_lines=None,
reporter_lines=None,
transgenic_lines=None,
include_failed=False):
if not include_failed:
objs = [o for o in objs if not o.get('failed', False)]
if ids is not None:
objs = [o for o in objs if o['id'] in ids]
if targeted_structures is not None:
objs = [o for o in objs if o[
'targeted_structure']['acronym'] in targeted_structures]
if imaging_depths is not None:
objs = [o for o in objs if o[
'imaging_depth'] in imaging_depths]
if cre_lines is not None:
tls = [ tl.lower() for tl in cre_lines ]
obj_tls = [ find_specimen_cre_line(o['specimen']) for o in objs ]
obj_tls = [ o.lower() if o else None for o in obj_tls ]
objs = [o for i,o in enumerate(objs) if obj_tls[i] in tls]
if reporter_lines is not None:
tls = [ tl.lower() for tl in reporter_lines ]
obj_tls = [ find_specimen_reporter_line(o['specimen']) for o in objs ]
obj_tls = [ o.lower() if o else None for o in obj_tls ]
objs = [o for i,o in enumerate(objs) if obj_tls[i] in tls]
if transgenic_lines is not None:
tls = set([ tl.lower() for tl in transgenic_lines ])
objs = [ o for o in objs
if len(tls & set([ tl.lower()
for tl in find_specimen_transgenic_lines(o['specimen']) ]) ) ]
return objs
[docs] def filter_experiment_containers(self, containers,
ids=None,
targeted_structures=None,
imaging_depths=None,
cre_lines=None,
reporter_lines=None,
transgenic_lines=None,
include_failed=False,
simple=False):
containers = self.filter_experiments_and_containers(containers,
ids=ids,
targeted_structures=targeted_structures,
imaging_depths=imaging_depths,
cre_lines=cre_lines,
reporter_lines=reporter_lines,
transgenic_lines=transgenic_lines,
include_failed=include_failed)
if simple:
containers = self.simplify_experiment_containers(containers)
return containers
[docs] def filter_ophys_experiments(self, experiments,
ids=None,
experiment_container_ids=None,
targeted_structures=None,
imaging_depths=None,
cre_lines=None,
reporter_lines=None,
transgenic_lines=None,
stimuli=None,
session_types=None,
include_failed=False,
require_eye_tracking=False,
simple=False):
experiments = self.filter_experiments_and_containers(experiments,
ids=ids,
targeted_structures=targeted_structures,
imaging_depths=imaging_depths,
cre_lines=cre_lines,
reporter_lines=reporter_lines,
transgenic_lines=transgenic_lines)
if require_eye_tracking:
experiments = [e for e in experiments
if e.get('fail_eye_tracking', None) is False]
if not include_failed:
experiments = [e for e in experiments
if not e.get('experiment_container',{}).get('failed', False)]
if experiment_container_ids is not None:
experiments = [e for e in experiments if e[
'experiment_container_id'] in experiment_container_ids]
if session_types is not None:
experiments = [e for e in experiments if e[
'stimulus_name'] in session_types]
if stimuli is not None:
experiments = [e for e in experiments
if len(set(stimuli) & set(stimulus_info.stimuli_in_session(e['stimulus_name']))) > 0]
if simple:
experiments = self.simplify_ophys_experiments(experiments)
return experiments
[docs] def filter_cell_specimens(self, cell_specimens,
ids=None,
experiment_container_ids=None,
include_failed=False,
filters=None):
"""
Filter a list of cell specimen records returned from the get_cell_metrics method according
some of their properties.
Parameters
----------
cell_specimens: list of dicts
List of records returned by the get_cell_metrics method.
ids: list of integers
Return only records for cells with cell specimen ids in this list
experiment_container_ids: list of integers
Return only records for cells that belong to experiment container ids in this list
include_failed: bool
Whether to include cells from failed experiment containers
filters: list of dicts
Custom query used to reproduce filter sets created in the Allen Brain Observatory
web application. The general form is a list of dictionaries each of which
describes a filtering operation based on a metric. For more information, see
dataframe_query.
"""
if not include_failed:
cell_specimens = [c for c in cell_specimens if not c.get(
'failed_experiment_container', False)]
if ids is not None:
cell_specimens = [c for c in cell_specimens if c[
'cell_specimen_id'] in ids]
if experiment_container_ids is not None:
cell_specimens = [c for c in cell_specimens if c[
'experiment_container_id'] in experiment_container_ids]
if filters is not None:
cell_specimens = self.dataframe_query(cell_specimens,
filters,
'cell_specimen_id')
return cell_specimens
[docs] def dataframe_query_string(self,
filters):
"""
Convert a list of cell metric filter dictionaries into a
Pandas query string.
"""
def _quote_string(v):
if isinstance(v, string_types):
return "'%s'" % (v)
else:
return str(v)
def _filter_clause(op, field, value):
if op == 'in':
query_args = [field, str(value)]
elif type(value) is list:
query_args = [field] + list(map(_quote_string, value))
else:
query_args = [field, str(value)]
cluster_string = self._QUERY_TEMPLATES[op].\
format(*query_args)
return cluster_string
query_string = ' & '.join(_filter_clause(f['op'],
f['field'],
f['value']) for f in filters)
return query_string
[docs] def dataframe_query(self,
data,
filters,
primary_key):
"""
Given a list of dictionary records and a list of filter dictionaries,
filter the records using Pandas and return the filtered set of records.
Parameters
----------
data: list of dicts
List of dictionaries
filters: list of dicts
Each dictionary describes a filtering operation on a field in the dictionary.
The general form is { 'field': <field>, 'op': <operation>, 'value': <filter_value(s)> }.
For example, you can apply a threshold on the "osi_dg" column with something like this:
{ 'field': 'osi_dg', 'op': '>', 'value': 1.0 }. See _QUERY_TEMPLATES for a full list
of operators.
"""
if len(filters) == 0:
return data
queries = self.dataframe_query_string(filters)
result_dataframe = pd.DataFrame(data)
result_dataframe = result_dataframe.query(queries)
result_keys = set(result_dataframe[primary_key])
result = [d for d in data
if d[primary_key]
in result_keys]
return result
[docs] def get_cell_specimen_id_mapping(self, file_name, mapping_table_id=None):
'''Download mapping table from old to new cell specimen IDs.
The mapping table is a CSV file that maps cell specimen ids
that have changed between processing runs of the Brain
Observatory pipeline.
Parameters
----------
file_name : string
Filename to save locally.
mapping_table_id : integer
ID of the mapping table file. Defaults to the most recent
mapping table.
Returns
-------
pandas.DataFrame
Mapping table as a DataFrame.
'''
if mapping_table_id is None:
mapping_table_id = self.CELL_MAPPING_ID
data = self.template_query('brain_observatory_queries',
'cell_specimen_id_mapping_table',
mapping_table_id=mapping_table_id)
try:
file_url = data[0]['download_link']
except Exception as _:
raise Exception("No OphysCellSpecimenIdMapping file found.")
self.retrieve_file_over_http(self.api_url + file_url, file_name)
return pd.read_csv(file_name)
[docs] def simplify_experiment_containers(self, containers):
return [{
'id': c['id'],
'imaging_depth': c['imaging_depth'],
'targeted_structure': c['targeted_structure']['acronym'],
'cre_line': find_specimen_cre_line(c['specimen']),
'reporter_line': find_specimen_reporter_line(c['specimen']),
'donor_name': c['specimen']['donor']['external_donor_name'],
'specimen_name': c['specimen']['name'],
'tags': find_container_tags(c),
'failed': c['failed']
} for c in containers]
[docs] def simplify_ophys_experiments(self, exps):
return [{
'id': e['id'],
'imaging_depth': e['imaging_depth'],
'targeted_structure': e['targeted_structure']['acronym'],
'cre_line': find_specimen_cre_line(e['specimen']),
'reporter_line': find_specimen_reporter_line(e['specimen']),
'acquisition_age_days': find_experiment_acquisition_age(e),
'experiment_container_id': e['experiment_container_id'],
'session_type': e['stimulus_name'],
'donor_name': e['specimen']['donor']['external_donor_name'],
'specimen_name': e['specimen']['name'],
'fail_eye_tracking': e.get('fail_eye_tracking', None)
} for e in exps]
[docs]def find_specimen_cre_line(specimen):
try:
return next(tl['name'] for tl in specimen['donor']['transgenic_lines']
if tl['transgenic_line_type_name'] == 'driver' and
'Cre' in tl['name'])
except StopIteration:
return None
[docs]def find_specimen_reporter_line(specimen):
try:
return next(tl['name'] for tl in specimen['donor']['transgenic_lines']
if tl['transgenic_line_type_name'] == 'reporter')
except StopIteration:
return None
[docs]def find_specimen_transgenic_lines(specimen):
return [ tl['name'] for tl in specimen['donor']['transgenic_lines'] ]
[docs]def find_experiment_acquisition_age(exp):
try:
return (parse_date(exp['date_of_acquisition']) - parse_date(exp['specimen']['donor']['date_of_birth'])).days
except KeyError as e:
return None