Source code for allensdk.brain_observatory.natural_scenes

# Allen Institute Software License - This software license is the 2-clause BSD
# license plus a third clause that prohibits redistribution for commercial
# purposes without further permission.
#
# Copyright 2016-2017. Allen Institute. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Redistributions for commercial purposes are not permitted without the
# Allen Institute's written permission.
# For purposes of this license, commercial purposes is the incorporation of the
# Allen Institute's software into anything for which you will charge fees or
# other compensation. Contact terms@alleninstitute.org for commercial licensing
# opportunities.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#
import scipy.stats as st
import numpy as np
import pandas as pd
from .stimulus_analysis import StimulusAnalysis
import logging
import h5py
from . import observatory_plots as oplots
from . import circle_plots as cplots
from .brain_observatory_exceptions import MissingStimulusException


[docs]class NaturalScenes(StimulusAnalysis):
    """ Perform tuning analysis specific to natural scenes stimulus.

    Parameters
    ----------
    data_set: BrainObservatoryNwbDataSet object
    """

    _log = logging.getLogger('allensdk.brain_observatory.natural_scenes')

    def __init__(self, data_set, **kwargs):
        super(NaturalScenes, self).__init__(data_set, **kwargs)

        self._number_scenes = StimulusAnalysis._PRELOAD
        self._sweeplength = StimulusAnalysis._PRELOAD
        self._interlength = StimulusAnalysis._PRELOAD
        self._extralength = StimulusAnalysis._PRELOAD

    @property
    def number_scenes(self):
        if self._number_scenes is StimulusAnalysis._PRELOAD:
            self.populate_stimulus_table()

        return self._number_scenes

    @property
    def sweeplength(self):
        if self._sweeplength is StimulusAnalysis._PRELOAD:
            self.populate_stimulus_table()

        return self._sweeplength

    @property
    def interlength(self):
        if self._interlength is StimulusAnalysis._PRELOAD:
            self.populate_stimulus_table()

        return self._interlength

    @property
    def extralength(self):
        if self._extralength is StimulusAnalysis._PRELOAD:
            self.populate_stimulus_table()

        return self._extralength

[docs]    def populate_stimulus_table(self):
        self._stim_table = self.data_set.get_stimulus_table('natural_scenes')
        self._number_scenes = len(np.unique(self._stim_table.frame))
        self._sweeplength = (
                self._stim_table.end.iloc[1] -
                self._stim_table.start.iloc[1])
        self._interlength = 4 * self._sweeplength
        self._extralength = self._sweeplength

[docs]    def get_response(self):
        ''' Computes the mean response for each cell to each stimulus
        condition.  Return is
        a (# scenes, # cells, 3) np.ndarray.  The final dimension
        contains the mean response to the condition (index 0), standard
        error of the mean of the response
        to the condition (index 1), and the number of trials with a
        significant (p < 0.05) response
        to that condition (index 2).

        Returns
        -------
        Numpy array storing mean responses.
        '''
        NaturalScenes._log.info("Calculating mean responses")

        response = np.empty((self.number_scenes, self.numbercells + 1, 3))

        def ptest(x):
            return len(np.where(x < (0.05 / (self.number_scenes - 1)))[0])

        for ns in range(self.number_scenes):
            subset_response = self.mean_sweep_response[
                self.stim_table.frame == (ns - 1)]
            subset_pval = self.pval[self.stim_table.frame == (ns - 1)]
            response[ns, :, 0] = subset_response.mean(axis=0)
            response[ns, :, 1] = subset_response.std(
                axis=0) / np.sqrt(len(subset_response))
            response[ns, :, 2] = subset_pval.apply(ptest, axis=0)

        return response

[docs]    def get_peak(self):
        ''' Computes metrics about peak response condition for each cell.

        Returns
        -------
        Pandas data frame with the following fields ('_ns' suffix is for
        natural scene):
            * scene_ns (scene number)
            * reliability_ns
            * peak_dff_ns (peak dF/F)
            * ptest_ns
            * p_run_ns
            * run_modulation_ns
            * time_to_peak_ns
        '''
        NaturalScenes._log.info('Calculating peak response properties')
        peak = pd.DataFrame(index=range(self.numbercells), columns=(
            'scene_ns', 'reliability_ns', 'peak_dff_ns',
            'ptest_ns', 'p_run_ns', 'run_modulation_ns',
            'time_to_peak_ns',
            'cell_specimen_id', 'image_selectivity_ns'))
        cids = self.data_set.get_cell_specimen_ids()

        for nc in range(self.numbercells):
            nsp = np.argmax(self.response[1:, nc, 0])
            peak.cell_specimen_id.iloc[nc] = cids[nc]
            peak.scene_ns[nc] = nsp
            #            peak.response_reliability_ns[nc] = self.response[
            #                nsp + 1, nc, 2] / 0.50  # assume 50 trials
            peak.peak_dff_ns[nc] = self.response[nsp + 1, nc, 0]
            #            subset = self.mean_sweep_response[
            #            self.stim_table.frame == nsp]
            #            subset_stat = subset[subset.dx < 2]
            #            subset_run = subset[subset.dx >= 2]
            #            if (len(subset_run) > 5) & (len(subset_stat) > 5):
            #                (_, peak.p_run_ns[nc]) = st.ks_2samp(
            #                    subset_run[str(nc)], subset_stat[str(nc)])
            #                peak.run_modulation_ns[nc] = subset_run[
            #                    str(nc)].mean() / subset_stat[str(nc)].mean()
            #            else:
            #                peak.p_run_ns[nc] = np.NaN
            #                peak.run_modulation_ns[nc] = np.NaN
            groups = []
            for im in range(self.number_scenes):
                subset = self.mean_sweep_response[
                    self.stim_table.frame == (im - 1)]
                groups.append(subset[str(nc)].values)
            (_, peak.ptest_ns[nc]) = st.f_oneway(*groups)
            test = self.sweep_response[
                self.stim_table.frame == nsp][str(nc)].mean()
            peak.time_to_peak_ns[nc] = \
                (np.argmax(test) - self.interlength) / self.acquisition_rate

            # running modulation
            subset = self.mean_sweep_response[self.stim_table.frame == nsp]
            subset_run = subset[subset.dx >= 1]
            subset_stat = subset[subset.dx < 1]
            if (len(subset_run) > 4) & (len(subset_stat) > 4):
                (_, peak.p_run_ns.iloc[nc]) = st.ttest_ind(subset_run[str(nc)],
                                                           subset_stat[
                                                               str(nc)],
                                                           equal_var=False)

                if subset_run[str(nc)].mean() > subset_stat[str(nc)].mean():
                    peak.run_modulation_ns.iloc[nc] = (subset_run[
                                                           str(nc)].mean() -
                                                       subset_stat[
                                                           str(nc)].mean()) \
                                                      / np.abs(
                        subset_run[str(nc)].mean())
                elif subset_run[str(nc)].mean() < subset_stat[str(nc)].mean():
                    peak.run_modulation_ns.iloc[nc] = \
                        (-1 * ((subset_stat[str(nc)].mean() -
                                subset_run[str(nc)].mean()) /
                               np.abs(subset_stat[str(nc)].mean())))
            else:
                peak.p_run_ns.iloc[nc] = np.NaN
                peak.run_modulation_ns.iloc[nc] = np.NaN

                # reliability
            subset = self.sweep_response[self.stim_table.frame == nsp]
            corr_matrix = np.empty((len(subset), len(subset)))
            for i in range(len(subset)):
                for j in range(len(subset)):
                    r, p = st.pearsonr(subset[str(nc)].iloc[i][28:42],
                                       subset[str(nc)].iloc[j][28:42])
                    corr_matrix[i, j] = r
            mask = np.ones((len(subset), len(subset)))
            for i in range(len(subset)):
                for j in range(len(subset)):
                    if i >= j:
                        mask[i, j] = np.NaN
            corr_matrix *= mask
            peak.reliability_ns.iloc[nc] = np.nanmean(corr_matrix)

            # image selectivity
            fmin = self.response[1:, nc, 0].min()
            fmax = self.response[1:, nc, 0].max()
            rtj = np.empty((1000, 1))
            for j in range(1000):
                thresh = fmin + j * ((fmax - fmin) / 1000.)
                theta = np.empty((118, 1))
                for im in range(118):
                    # im+1 to only look at
                    if self.response[im + 1, nc, 0] > thresh:
                        # images, not blanksweep
                        theta[im] = 1
                    else:
                        theta[im] = 0
                rtj[j] = theta.mean()

            biga = rtj.mean()
            bigs = 1 - (2 * biga)
            peak.image_selectivity_ns.iloc[nc] = bigs

        return peak

[docs]    def plot_time_to_peak(self,
                          p_value_max=oplots.P_VALUE_MAX,
                          color_map=oplots.STIMULUS_COLOR_MAP):
        stimulus_table = self.data_set.get_stimulus_table('natural_scenes')

        resps = []

        for index, row in self.peak.iterrows():
            mean_response = \
                self.sweep_response.loc[stimulus_table.frame == row.scene_ns][
                    str(index)].mean()
            resps.append(
                (mean_response - mean_response.mean() / mean_response.std()))

        mean_responses = np.array(resps)

        sorted_table = self.peak[self.peak.ptest_ns < p_value_max].sort_values(
            'time_to_peak_ns')
        cell_order = sorted_table.index

        # time to peak is relative to stimulus start in seconds
        ttps = sorted_table.time_to_peak_ns.values + self.interlength / \
            self.acquisition_rate
        msrs_sorted = mean_responses[cell_order, :]

        oplots.plot_time_to_peak(
            msrs_sorted,
            ttps,
            0,
            (2 * self.interlength + self.sweeplength) / self.acquisition_rate,
            self.interlength / self.acquisition_rate,
            (self.interlength + self.sweeplength) / self.acquisition_rate,
            color_map)

[docs]    def open_corona_plot(self, cell_specimen_id=None, cell_index=None):
        cell_index = self.row_from_cell_id(cell_specimen_id, cell_index)

        df = self.mean_sweep_response[str(cell_index)]
        data = df.values

        st = self.data_set.get_stimulus_table('natural_scenes')
        mask = st[st.frame >= 0].index

        cmin = self.response[0, cell_index, 0]
        cmax = max(cmin, data.mean() + data.std() * 3)

        cp = cplots.CoronaPlotter()
        cp.plot(st.frame.loc[mask].values,
                data=df.loc[mask].values,
                clim=[cmin, cmax])
        cp.show_arrow()
        cp.show_circle()

[docs]    def reshape_response_array(self):
        '''
        :return: response array in cells x stim x repetition for noise
        correlations
        '''

        mean_sweep_response = \
            self.mean_sweep_response.values[:, :self.numbercells]

        stim_table = self.stim_table
        frames = np.unique(stim_table.frame.values)

        reps = [len(np.where(stim_table.frame.values == frame)[0]) for frame in
                frames]

        # just in case there are different numbers of repetitions
        Nreps = min(reps)

        response_new = np.zeros((self.numbercells, self.number_scenes),
                                dtype='object')
        for i, frame in enumerate(frames):
            ind = np.where(stim_table.frame.values == frame)[0][:Nreps]
            for c in range(self.numbercells):
                response_new[c, i] = mean_sweep_response[ind, c]

        return response_new

[docs]    def get_signal_correlation(self, corr='spearman'):
        logging.debug("Calculating signal correlations")

        response = self.response[:, :, 0].T
        response = response[:self.numbercells, :]
        N, Nstim = response.shape

        signal_corr = np.zeros((N, N))
        signal_p = np.empty((N, N))
        if corr == 'pearson':
            for i in range(N):
                for j in range(i, N):  # matrix is symmetric
                    signal_corr[i, j], signal_p[i, j] = st.pearsonr(
                        response[i], response[j])

        elif corr == 'spearman':
            for i in range(N):
                for j in range(i, N):  # matrix is symmetric
                    signal_corr[i, j], signal_p[i, j] = st.spearmanr(
                        response[i], response[j])

        else:
            raise Exception('correlation should be pearson or spearman')

        # fill in lower triangle
        signal_corr = (
                np.triu(signal_corr) +
                np.triu(signal_corr, 1).T)

        # fill in lower triangle
        signal_p = (
                np.triu(signal_p) +
                np.triu(signal_p, 1).T)

        return signal_corr, signal_p

[docs]    def get_representational_similarity(self, corr='spearman'):
        logging.debug("Calculating representational similarity")

        response = self.response[:, :, 0]
        response = response[:, :self.numbercells]
        Nstim, N = response.shape

        rep_sim = np.zeros((Nstim, Nstim))
        rep_sim_p = np.empty((Nstim, Nstim))
        if corr == 'pearson':
            for i in range(Nstim):
                for j in range(i, Nstim):  # matrix is symmetric
                    rep_sim[i, j], rep_sim_p[i, j] = st.pearsonr(response[i],
                                                                 response[j])

        elif corr == 'spearman':
            for i in range(Nstim):
                for j in range(i, Nstim):  # matrix is symmetric
                    rep_sim[i, j], rep_sim_p[i, j] = st.spearmanr(response[i],
                                                                  response[j])

        else:
            raise Exception('correlation should be pearson or spearman')

        rep_sim = np.triu(rep_sim) + np.triu(rep_sim,
                                             1).T  # fill in lower triangle
        rep_sim_p = np.triu(rep_sim_p) + np.triu(rep_sim_p,
                                                 1).T  # fill in lower triangle

        return rep_sim, rep_sim_p

[docs]    def get_noise_correlation(self, corr='spearman'):
        logging.debug("Calculating noise correlations")

        response = self.reshape_response_array()
        noise_corr = np.zeros(
            (self.numbercells, self.numbercells, self.number_scenes))
        noise_corr_p = np.zeros(
            (self.numbercells, self.numbercells, self.number_scenes))

        if corr == 'pearson':
            for k in range(self.number_scenes):
                for i in range(self.numbercells):
                    for j in range(i, self.numbercells):
                        noise_corr[i, j, k], noise_corr_p[
                            i, j, k] = st.pearsonr(response[i, k],
                                                   response[j, k])

                noise_corr[:, :, k] = np.triu(noise_corr[:, :, k]) + np.triu(
                    noise_corr[:, :, k], 1).T
                noise_corr_p[:, :, k] = np.triu(
                    noise_corr_p[:, :, k]) + np.triu(noise_corr_p[:, :, k],
                                                     1).T

        elif corr == 'spearman':
            for k in range(self.number_scenes):
                for i in range(self.numbercells):
                    for j in range(i, self.numbercells):
                        noise_corr[i, j, k], noise_corr_p[
                            i, j, k] = st.spearmanr(response[i, k],
                                                    response[j, k])

                noise_corr[:, :, k] = np.triu(noise_corr[:, :, k]) + np.triu(
                    noise_corr[:, :, k], 1).T
                noise_corr_p[:, :, k] = np.triu(
                    noise_corr_p[:, :, k]) + np.triu(noise_corr_p[:, :, k],
                                                     1).T

        else:
            raise Exception('correlation should be pearson or spearman')

        return noise_corr, noise_corr_p

[docs]    @staticmethod
    def from_analysis_file(data_set, analysis_file):
        ns = NaturalScenes(data_set)
        ns.populate_stimulus_table()

        try:
            ns._sweep_response = pd.read_hdf(analysis_file,
                                             "analysis/sweep_response_ns")
            ns._mean_sweep_response = pd.read_hdf(
                analysis_file, "analysis/mean_sweep_response_ns")
            ns._peak = pd.read_hdf(analysis_file, "analysis/peak")

            with h5py.File(analysis_file, "r") as f:
                ns._response = f["analysis/response_ns"][()]
                ns._binned_dx_sp = f["analysis/binned_dx_sp"][()]
                ns._binned_cells_sp = f["analysis/binned_cells_sp"][()]
                ns._binned_dx_vis = f["analysis/binned_dx_vis"][()]
                ns._binned_cells_vis = f["analysis/binned_cells_vis"][()]

                if "analysis/noise_corr_ns" in f:
                    ns.noise_correlation = f["analysis/noise_corr_ns"][()]
                if "analysis/signal_corr_ns" in f:
                    ns.signal_correlation = f["analysis/signal_corr_ns"][()]
                if "analysis/rep_similarity_ns" in f:
                    ns.representational_similarity = f[
                        "analysis/rep_similarity_ns"][()]

        except Exception as e:
            raise MissingStimulusException(e.args)

        return ns
Source code for allensdk.brain_observatory.natural_scenes

Contents

Questions