# Allen Institute Software License - This software license is the 2-clause BSD
# license plus a third clause that prohibits redistribution for commercial
# purposes without further permission.
#
# Copyright 2015-2017. Allen Institute. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Redistributions for commercial purposes are not permitted without the
# Allen Institute's written permission.
# For purposes of this license, commercial purposes is the incorporation of the
# Allen Institute's software into anything for which you will charge fees or
# other compensation. Contact terms@alleninstitute.org for commercial licensing
# opportunities.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#
import functools
import operator as op
import os
import re
import warnings
import nrrd
import numpy as np
import pandas as pd
import SimpleITK as sitk
from allensdk.api.queries.mouse_connectivity_api import MouseConnectivityApi
from allensdk.api.warehouse_cache.cache import Cache, get_default_manifest_file
from allensdk.config.manifest import Manifest
from six.moves import reduce
from .reference_space_cache import ReferenceSpaceCache
[docs]class MouseConnectivityCache(ReferenceSpaceCache):
"""
Cache class for storing and accessing data related to the adult mouse
Connectivity Atlas. By default, this class will cache any downloaded
metadata or files in well known locations defined in a manifest file.
This behavior can be disabled.
Attributes
----------
resolution: int
Resolution of grid data to be downloaded when accessing projection
volume, the annotation volume, and the annotation volume. Must be
one of (10, 25, 50, 100). Default is 25.
api: MouseConnectivityApi instance
Used internally to make API queries.
Parameters
----------
resolution: int
Resolution of grid data to be downloaded when accessing projection
volume, the annotation volume, and the annotation volume. Must be
one of (10, 25, 50, 100). Default is 25.
ccf_version: string
Desired version of the Common Coordinate Framework. This affects
the annotation volume (get_annotation_volume) and structure masks
(get_structure_mask). Must be one of (MouseConnectivityApi.CCF_2015,
MouseConnectivityApi.CCF_2016).
Default: MouseConnectivityApi.CCF_2016
cache: boolean
Whether the class should save results of API queries to locations
specified in the manifest file. Queries for files (as opposed to
metadata) must have a file location. If caching is disabled, those
locations must be specified in the function call (e.g.
get_projection_density(file_name='file.nrrd')).
manifest_file: string
File name of the manifest to be read. Default is
"mouse_connectivity_manifest.json".
"""
PROJECTION_DENSITY_KEY = "PROJECTION_DENSITY"
INJECTION_DENSITY_KEY = "INJECTION_DENSITY"
INJECTION_FRACTION_KEY = "INJECTION_FRACTION"
DATA_MASK_KEY = "DATA_MASK"
STRUCTURE_UNIONIZES_KEY = "STRUCTURE_UNIONIZES"
EXPERIMENTS_KEY = "EXPERIMENTS"
DEFORMATION_FIELD_HEADER_KEY = "DEFORMATION_FIELD_HEADER"
DEFORMATION_FIELD_VOXEL_KEY = "DEFORMATION_FIELD_VOXELS"
ALIGNMENT3D_KEY = "ALIGNMENT3D"
MANIFEST_VERSION = 1.3
SUMMARY_STRUCTURE_SET_ID = 167587189
DEFAULT_STRUCTURE_SET_IDS = tuple([SUMMARY_STRUCTURE_SET_ID])
DFMFLD_RESOLUTIONS = (25,)
@property
def default_structure_ids(self):
if not hasattr(self, "_default_structure_ids"):
tree = self.get_structure_tree()
default_structures = tree.get_structures_by_set_id(
MouseConnectivityCache.DEFAULT_STRUCTURE_SET_IDS
)
self._default_structure_ids = [
st["id"] for st in default_structures
]
return self._default_structure_ids
def __init__(
self,
resolution=None,
cache=True,
manifest_file=None,
ccf_version=None,
base_uri=None,
version=None,
):
if manifest_file is None:
manifest_file = get_default_manifest_file("mouse_connectivity")
if version is None:
version = self.MANIFEST_VERSION
if resolution is None:
resolution = MouseConnectivityApi.VOXEL_RESOLUTION_25_MICRONS
if ccf_version is None:
ccf_version = MouseConnectivityApi.CCF_VERSION_DEFAULT
super(MouseConnectivityCache, self).__init__(
resolution,
reference_space_key=ccf_version,
cache=cache,
manifest=manifest_file,
version=version,
)
self.api = MouseConnectivityApi(base_uri=base_uri)
[docs] def get_projection_density(self, experiment_id, file_name=None):
"""
Read a projection density volume for a single experiment. Download it
first if it doesn't exist. Projection density is the proportion of
of projecting pixels in a grid voxel in [0,1].
Parameters
----------
experiment_id: int
ID of the experiment to download/read. This corresponds to
section_data_set_id in the API.
file_name: string
File name to store the template volume. If it already exists,
it will be read from this file. If file_name is None, the
file_name will be pulled out of the manifest. Default is None.
"""
file_name = self.get_cache_path(
file_name,
self.PROJECTION_DENSITY_KEY,
experiment_id,
self.resolution,
)
self.api.download_projection_density(
file_name, experiment_id, self.resolution, strategy="lazy"
)
return nrrd.read(file_name)
[docs] def get_injection_density(self, experiment_id, file_name=None):
"""
Read an injection density volume for a single experiment. Download it
first if it doesn't exist. Injection density is the proportion of
projecting pixels in a grid voxel only including pixels that are
part of the injection site in [0,1].
Parameters
----------
experiment_id: int
ID of the experiment to download/read. This corresponds to
section_data_set_id in the API.
file_name: string
File name to store the template volume. If it already exists,
it will be read from this file. If file_name is None, the
file_name will be pulled out of the manifest. Default is None.
"""
file_name = self.get_cache_path(
file_name,
self.INJECTION_DENSITY_KEY,
experiment_id,
self.resolution,
)
self.api.download_injection_density(
file_name, experiment_id, self.resolution, strategy="lazy"
)
return nrrd.read(file_name)
[docs] def get_injection_fraction(self, experiment_id, file_name=None):
"""
Read an injection fraction volume for a single experiment. Download it
first if it doesn't exist. Injection fraction is the proportion of
pixels in the injection site in a grid voxel in [0,1].
Parameters
----------
experiment_id: int
ID of the experiment to download/read. This corresponds to
section_data_set_id in the API.
file_name: string
File name to store the template volume. If it already exists,
it will be read from this file. If file_name is None, the
file_name will be pulled out of the manifest. Default is None.
"""
file_name = self.get_cache_path(
file_name,
self.INJECTION_FRACTION_KEY,
experiment_id,
self.resolution,
)
self.api.download_injection_fraction(
file_name, experiment_id, self.resolution, strategy="lazy"
)
return nrrd.read(file_name)
[docs] def get_data_mask(self, experiment_id, file_name=None):
"""
Read a data mask volume for a single experiment. Download it
first if it doesn't exist. Data mask is a binary mask of
voxels that have valid data. Only use valid data in analysis!
Parameters
----------
experiment_id: int
ID of the experiment to download/read. This corresponds to
section_data_set_id in the API.
file_name: string
File name to store the template volume. If it already exists,
it will be read from this file. If file_name is None, the
file_name will be pulled out of the manifest. Default is None.
"""
file_name = self.get_cache_path(
file_name, self.DATA_MASK_KEY, experiment_id, self.resolution
)
self.api.download_data_mask(
file_name, experiment_id, self.resolution, strategy="lazy"
)
return nrrd.read(file_name)
[docs] def get_experiments(
self,
dataframe=False,
file_name=None,
cre=None,
injection_structure_ids=None,
):
"""
Read a list of experiments that match certain criteria. If caching is
enabled,
this will save the whole (unfiltered) list of experiments to a file.
Parameters
----------
dataframe: boolean
Return the list of experiments as a Pandas DataFrame. If False,
return a list of dictionaries. Default False.
file_name: string
File name to save/read the structures table. If file_name is None,
the file_name will be pulled out of the manifest. If caching
is disabled, no file will be saved. Default is None.
cre: boolean or list
If True, return only cre-positive experiments. If False, return
only
cre-negative experiments. If None, return all experients. If list,
return
all experiments with cre line names in the supplied list. Default
None.
injection_structure_ids: list
Only return experiments that were injected in the structures
provided here.
If None, return all experiments. Default None.
"""
file_name = self.get_cache_path(file_name, self.EXPERIMENTS_KEY)
experiments = self.api.get_experiments_api(
path=file_name, strategy="lazy", **Cache.cache_json()
)
for e in experiments:
# renaming id
e["id"] = e["data_set_id"]
del e["data_set_id"]
# simplify trangsenic line
tl = e.get("transgenic_line", None)
if tl:
e["transgenic_line"] = tl["name"]
# parse the injection structures
injs = [int(i) for i in e["injection_structures"].split("/")]
e["injection_structures"] = injs
e["primary_injection_structure"] = e["structure_id"]
# remove storage dir
del e["storage_directory"]
# filter the read/downloaded list of experiments
experiments = self.filter_experiments(
experiments, cre, injection_structure_ids
)
if dataframe:
experiments = pd.DataFrame(experiments)
experiments.set_index(["id"], inplace=True, drop=False)
return experiments
[docs] def filter_experiments(
self, experiments, cre=None, injection_structure_ids=None
):
"""
Take a list of experiments and filter them by cre status and injection
structure.
Parameters
----------
cre: boolean or list
If True, return only cre-positive experiments. If False, return
only
cre-negative experiments. If None, return all experients. If list,
return
all experiments with cre line names in the supplied list. Default
None.
injection_structure_ids: list
Only return experiments that were injected in the structures
provided here.
If None, return all experiments. Default None.
"""
if cre is True:
experiments = [e for e in experiments if e["transgenic_line"]]
elif cre is False:
experiments = [e for e in experiments if not e["transgenic_line"]]
elif cre is not None:
cre = [c.lower() for c in cre]
experiments = [
e
for e in experiments
if e["transgenic_line"] is not None
and e["transgenic_line"].lower() in cre
]
if injection_structure_ids is not None:
descendant_ids = set(
reduce(
op.add,
self.get_structure_tree().descendant_ids(
injection_structure_ids
),
)
)
experiments = [
e for e in experiments if e["structure_id"] in descendant_ids
]
return experiments
[docs] def get_experiment_structure_unionizes(
self,
experiment_id,
file_name=None,
is_injection=None,
structure_ids=None,
include_descendants=False,
hemisphere_ids=None,
):
"""
Retrieve the structure unionize data for a specific experiment. Filter
by
structure, injection status, and hemisphere.
Parameters
----------
experiment_id: int
ID of the experiment of interest. Corresponds to
section_data_set_id in the API.
file_name: string
File name to save/read the experiments list. If file_name is None,
the file_name will be pulled out of the manifest. If caching
is disabled, no file will be saved. Default is None.
is_injection: boolean
If True, only return unionize records that disregard non-injection
pixels.
If False, only return unionize records that disregard injection
pixels.
If None, return all records. Default None.
structure_ids: list
Only return unionize records for a specific set of structures.
If None, return all records. Default None.
include_descendants: boolean
Include all descendant records for specified structures. Default
False.
hemisphere_ids: list
Only return unionize records that disregard pixels outside of a
hemisphere.
or set of hemispheres. Left = 1, Right = 2, Both = 3. If None,
include all
records [1, 2, 3]. Default None.
"""
file_name = self.get_cache_path(
file_name, self.STRUCTURE_UNIONIZES_KEY, experiment_id
)
filter_fn = functools.partial(
self.filter_structure_unionizes,
is_injection=is_injection,
structure_ids=structure_ids,
include_descendants=include_descendants,
hemisphere_ids=hemisphere_ids,
)
def col_rn(x):
return pd.DataFrame(x).rename(
columns={"section_data_set_id": "experiment_id"}
)
return self.api.get_structure_unionizes(
[experiment_id],
path=file_name,
strategy="lazy",
pre=col_rn,
post=filter_fn,
writer=lambda p, x: pd.DataFrame(x).to_csv(p),
reader=lambda x: pd.read_csv(x, index_col=0, parse_dates=True),
)
[docs] def rank_structures(
self,
experiment_ids,
is_injection,
structure_ids=None,
hemisphere_ids=None,
rank_on="normalized_projection_volume",
n=5,
threshold=10**-2,
):
"""Produces one or more (per experiment) ranked lists of
brain structures, using a specified data field.
Parameters
----------
experiment_ids : list of int
Obtain injection_structures for these experiments.
is_injection : boolean
Use data from only injection (or non-injection) unionizes.
structure_ids : list of int, optional
Consider only these structures. It is a good idea to make sure that
these structures are not spatially
overlapping; otherwise your results will contain redundant
information. Defaults to the summary
structures - a brain-wide list of nonoverlapping mid-level
structures.
hemisphere_ids : list of int, optional
Consider only these hemispheres (1: left, 2: right, 3: both). Like
with structures,
you might get redundant results if you select overlapping options.
Defaults to [1, 2].
rank_on : str, optional
Rank unionize data using this field (descending). Defaults to
normalized_projection_volume.
n : int, optional
Return only the top n structures.
threshold : float, optional
Consider only records whose data value - specified by the rank_on
parameter - exceeds this value.
Returns
-------
list :
Each element (1 for each input experiment) is a list of
dictionaries. The dictionaries describe the top
injection structures in descending order. They are specified by
their structure and hemisphere id fields and
additionally report the value specified by the rank_on parameter.
"""
output_keys = [
"experiment_id",
rank_on,
"hemisphere_id",
"structure_id",
]
if hemisphere_ids is None:
hemisphere_ids = [1, 2]
if structure_ids is None:
structure_ids = self.default_structure_ids
unionizes = self.get_structure_unionizes(
experiment_ids,
is_injection=is_injection,
structure_ids=structure_ids,
hemisphere_ids=hemisphere_ids,
include_descendants=False,
)
unionizes = unionizes[unionizes[rank_on] > threshold]
results = []
for eid in experiment_ids:
this_experiment_unionizes = unionizes[
unionizes["experiment_id"] == eid
]
this_experiment_unionizes = this_experiment_unionizes.sort_values(
by=rank_on, ascending=False
)
this_experiment_unionizes = this_experiment_unionizes.loc[
:, output_keys
]
this_experiment_unionizes = unionizes[
unionizes["experiment_id"] == eid
]
this_experiment_unionizes = this_experiment_unionizes.sort_values(
by=rank_on, ascending=False
)
this_experiment_unionizes = this_experiment_unionizes.loc[
:, output_keys
]
records = this_experiment_unionizes.to_dict("records")
if len(records) > n:
records = records[:n]
results.append(records)
return results
[docs] def filter_structure_unionizes(
self,
unionizes,
is_injection=None,
structure_ids=None,
include_descendants=False,
hemisphere_ids=None,
):
"""
Take a list of unionzes and return a subset of records filtered by
injection status, structure, and
hemisphere.
Parameters
----------
is_injection: boolean
If True, only return unionize records that disregard non-injection
pixels.
If False, only return unionize records that disregard injection
pixels.
If None, return all records. Default None.
structure_ids: list
Only return unionize records for a set of structures.
If None, return all records. Default None.
include_descendants: boolean
Include all descendant records for specified structures. Default
False.
hemisphere_ids: list
Only return unionize records that disregard pixels outside of a
hemisphere.
or set of hemispheres. Left = 1, Right = 2, Both = 3. If None,
include all
records [1, 2, 3]. Default None.
"""
if is_injection is not None:
unionizes = unionizes[unionizes.is_injection == is_injection]
if structure_ids is not None:
structure_ids = MouseConnectivityCache.validate_structure_ids(
structure_ids
)
if include_descendants:
structure_ids = reduce(
op.add,
self.get_structure_tree().descendant_ids(structure_ids),
)
else:
structure_ids = set(structure_ids)
unionizes = unionizes[
unionizes["structure_id"].isin(structure_ids)
]
if hemisphere_ids is not None:
unionizes = unionizes[
unionizes["hemisphere_id"].isin(hemisphere_ids)
]
return unionizes
[docs] def get_structure_unionizes(
self,
experiment_ids,
is_injection=None,
structure_ids=None,
include_descendants=False,
hemisphere_ids=None,
):
"""
Get structure unionizes for a set of experiment IDs. Filter the
results by injection status,
structure, and hemisphere.
Parameters
----------
experiment_ids: list
List of experiment IDs. Corresponds to section_data_set_id in the
API.
is_injection: boolean
If True, only return unionize records that disregard non-injection
pixels.
If False, only return unionize records that disregard injection
pixels.
If None, return all records. Default None.
structure_ids: list
Only return unionize records for a specific set of structures.
If None, return all records. Default None.
include_descendants: boolean
Include all descendant records for specified structures. Default
False.
hemisphere_ids: list
Only return unionize records that disregard pixels outside of a
hemisphere.
or set of hemispheres. Left = 1, Right = 2, Both = 3. If None,
include all
records [1, 2, 3]. Default None.
"""
unionizes = [
self.get_experiment_structure_unionizes(
eid,
is_injection=is_injection,
structure_ids=structure_ids,
include_descendants=include_descendants,
hemisphere_ids=hemisphere_ids,
)
for eid in experiment_ids
]
return pd.concat(unionizes, ignore_index=True, sort=True)
[docs] def get_projection_matrix(
self,
experiment_ids,
projection_structure_ids=None,
hemisphere_ids=None,
parameter="projection_volume",
dataframe=False,
):
if projection_structure_ids is None:
projection_structure_ids = self.default_structure_ids
unionizes = self.get_structure_unionizes(
experiment_ids,
is_injection=False,
structure_ids=projection_structure_ids,
include_descendants=False,
hemisphere_ids=hemisphere_ids,
)
hemisphere_ids = set(unionizes["hemisphere_id"].values.tolist())
nrows = len(experiment_ids)
ncolumns = len(projection_structure_ids) * len(hemisphere_ids)
matrix = np.empty((nrows, ncolumns))
matrix[:] = np.NAN
row_lookup = {}
for idx, e in enumerate(experiment_ids):
row_lookup[e] = idx
column_lookup = {}
columns = []
cidx = 0
hlabel = {1: "-L", 2: "-R", 3: ""}
acronym_map = self.get_structure_tree().value_map(
lambda x: x["id"], lambda x: x["acronym"]
)
for hid in hemisphere_ids:
for sid in projection_structure_ids:
column_lookup[(hid, sid)] = cidx
label = acronym_map[sid] + hlabel[hid]
columns.append(
{"hemisphere_id": hid, "structure_id": sid, "label": label}
)
cidx += 1
for _, row in unionizes.iterrows():
ridx = row_lookup[row["experiment_id"]]
k = (row["hemisphere_id"], row["structure_id"])
cidx = column_lookup[k]
matrix[ridx, cidx] = row[parameter]
if dataframe:
warnings.warn("dataframe argument is deprecated.")
all_experiments = self.get_experiments(dataframe=True)
rows_df = all_experiments.loc[experiment_ids]
cols_df = pd.DataFrame(columns)
return {"matrix": matrix, "rows": rows_df, "columns": cols_df}
else:
return {
"matrix": matrix,
"rows": experiment_ids,
"columns": columns,
}
[docs] def get_affine_parameters(
self, section_data_set_id, direction="trv", file_name=None
):
"""Extract the parameters of the 3D affine tranformation mapping
this section data set's image-space stack to
CCF-space (or vice-versa).
Parameters
----------
section_data_set_id : int
download the parameters for this data set.
direction : str, optional
Valid options are:
trv : "transform from reference to volume". Maps CCF points
to image space points. If you are resampling data into
CCF, this is the direction you want.
tvr : "transform from volume to reference". Maps image
space points to CCF points.
file_name : str
If provided, store the downloaded file here.
Returns
-------
alignment : numpy.ndarray
4 X 3 matrix. In order to transform a point [X_1, X_2, X_3] run
np.dot([X_1, X_2, X_3, 1], alignment). In
to build a SimpleITK affine transform run:
transform = sitk.AffineTransform(3)
transform.SetParameters(alignment.flatten())
"""
if direction not in ("trv", "tvr"):
raise ValueError(
"invalid direction: {}. direction must be one of tvr,"
"trv".format(direction)
)
file_name = self.get_cache_path(file_name, self.ALIGNMENT3D_KEY)
raw_alignment = self.api.download_alignment3d(
strategy="lazy",
path=file_name,
section_data_set_id=section_data_set_id,
**Cache.cache_json(),
)
alignment_re = re.compile(r"{}_(?P<index>\d+)".format(direction))
alignment = np.zeros((4, 3), dtype=float)
for entry, value in raw_alignment.items():
match = alignment_re.match(entry)
if match is not None:
alignment.flat[int(match.group("index"))] = value
return alignment
[docs] def add_manifest_paths(self, manifest_builder):
"""
Construct a manifest for this Cache class and save it in a file.
Parameters
----------
file_name: string
File location to save the manifest.
"""
manifest_builder = super(
MouseConnectivityCache, self
).add_manifest_paths(manifest_builder)
manifest_builder.add_path(
self.EXPERIMENTS_KEY,
"experiments.json",
parent_key="BASEDIR",
typename="file",
)
manifest_builder.add_path(
self.STRUCTURE_UNIONIZES_KEY,
"experiment_%d/structure_unionizes.csv",
parent_key="BASEDIR",
typename="file",
)
manifest_builder.add_path(
self.INJECTION_DENSITY_KEY,
"experiment_%d/injection_density_%d.nrrd",
parent_key="BASEDIR",
typename="file",
)
manifest_builder.add_path(
self.INJECTION_FRACTION_KEY,
"experiment_%d/injection_fraction_%d.nrrd",
parent_key="BASEDIR",
typename="file",
)
manifest_builder.add_path(
self.DATA_MASK_KEY,
"experiment_%d/data_mask_%d.nrrd",
parent_key="BASEDIR",
typename="file",
)
manifest_builder.add_path(
self.PROJECTION_DENSITY_KEY,
"experiment_%d/projection_density_%d.nrrd",
parent_key="BASEDIR",
typename="file",
)
manifest_builder.add_path(
self.DEFORMATION_FIELD_HEADER_KEY,
"experiment_%d/dfmfld.mhd",
parent_key="BASEDIR",
typename="file",
)
manifest_builder.add_path(
self.DEFORMATION_FIELD_VOXEL_KEY,
"experiment_%d/dfmfld.raw",
parent_key="BASEDIR",
typename="file",
)
manifest_builder.add_path(
self.ALIGNMENT3D_KEY,
"experiment_%d/alignment3d.json",
parent_key="BASEDIR",
typename="file",
)
return manifest_builder