Source code for allensdk.brain_observatory.dff

# Allen Institute Software License - This software license is the 2-clause BSD
# license plus a third clause that prohibits redistribution for commercial
# purposes without further permission.
#
# Copyright 2017. Allen Institute. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Redistributions for commercial purposes are not permitted without the
# Allen Institute's written permission.
# For purposes of this license, commercial purposes is the incorporation of the
# Allen Institute's software into anything for which you will charge fees or
# other compensation. Contact terms@alleninstitute.org for commercial licensing
# opportunities.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#
import logging
import os
import argparse
import matplotlib.pyplot as plt
import h5py
import numpy as np
from scipy.ndimage.filters import median_filter

from allensdk.core.brain_observatory_nwb_data_set import \
    BrainObservatoryNwbDataSet

GAUSSIAN_MAD_STD_SCALE = 1.4826


[docs]def movingmode_fast(x, kernelsize, y): """Compute the windowed mode of an array. A running mode is initialized with a histogram of values over the initial kernelsize/2 values. The mode is then updated as the kernel moves by adding and subtracting values from the histogram. Parameters ---------- x : np.ndarray Array to be analyzed kernelsize : int Size of the moving window y : np.ndarray Output array to store the results """ # offset so that the trace is non-negative minval = min(x.min(), 0) if minval < 0: x = x - minval maxval = x.max() # compute a histogram of a half kernel halfsize = int(kernelsize / 2) histo = np.bincount(np.rint(x[:halfsize]).astype( np.uint32), minlength=int(maxval + 2)) # find the mode of the first half kernel mode = np.argmax(histo) # here initial mode is available for m in range(0, halfsize): q = int(round(x[halfsize + m])) histo[q] += 1 if histo[q] > histo[mode]: mode = q y[m] = mode for m in range(halfsize, x.shape[0] - halfsize): p = int(round(x[m - halfsize])) histo[p] -= 1 # need to find possibly new mode value if p == mode: mode = np.argmax(histo) q = int(round(x[m + halfsize])) histo[q] += 1 if histo[q] > histo[mode]: mode = q y[m] = mode for m in range(x.shape[0] - halfsize, x.shape[0]): p = int(round(x[m - halfsize])) histo[p] -= 1 # need to find possibly new mode value if p == mode: mode = np.argmax(histo) y[m] = mode # undo the offset if minval < 0: y += minval return 0
[docs]def movingaverage(x, kernelsize, y): """Compute the windowed average of an array. Parameters ---------- x : np.ndarray Array to be analyzed kernelsize : int Size of the moving window y : np.ndarray Output array to store the results """ halfsize = int(kernelsize / 2) sumkernel = np.sum(x[0:halfsize]) for m in range(0, halfsize): sumkernel = sumkernel + x[m + halfsize] y[m] = sumkernel / (halfsize + m) sumkernel = np.sum(x[0:kernelsize]) for m in range(halfsize, x.shape[0] - halfsize): sumkernel = sumkernel - x[m - halfsize] + x[m + halfsize] y[m] = sumkernel / kernelsize for m in range(x.shape[0] - halfsize, x.shape[0]): sumkernel = sumkernel - x[m - halfsize] y[m] = sumkernel / (halfsize - 1 + (x.shape[0] - m)) return 0
[docs]def plot_onetrace(dff, fc): """Debug plotting function""" qs = np.rint(np.linspace(0, len(dff), 5)).astype(int) dff_max = dff.max() dff_min = dff.min() fc_max = fc.max() fc_min = fc.min() for qi in range(len(qs) - 1): r = qs[qi], qs[qi + 1] frames = np.arange(r[0], r[1]) ax = plt.subplot(len(qs), 1, qi + 1) ax.plot(frames, dff[r[0]:r[1]], 'g') ax.set_ylim(dff_min, dff_max) ax.set_xlim(r[0], r[1]) ax.set_xlabel('frames', fontsize=18) ax.set_ylabel('DF/F', fontsize=18, color='g') ax = ax.twinx() ax.plot(frames, fc[r[0]:r[1]], 'b') ax.set_ylim(fc_min, fc_max) ax.set_xlim(r[0], r[1]) ax.set_ylabel('FC', fontsize=18, color='b') return 0
[docs]def compute_dff_windowed_mode(traces, mode_kernelsize=5400, mean_kernelsize=3000): """Compute dF/F of a set of traces using a low-pass windowed-mode operator. The operation is basically: T_mm = windowed_mean(windowed_mode(T)) T_dff = (T - T_mm) / T_mm Parameters ---------- traces : np.ndarray 2D array of traces to be analyzed. mode_kernelsize : int Window size to use for windowed_mode. mean_kernelsize : int Window size to use for windowed_mean. Returns ------- dff : np.ndarray 2D array of dF/F traces. """ if mode_kernelsize >= traces.shape[1]: mode_kernelsize = traces.shape[1] // 2 logging.warning("Changing mode_kernelsize to " + str(mode_kernelsize)) if mean_kernelsize >= traces.shape[1]: mean_kernelsize = traces.shape[1] // 4 logging.warning("Changing mean_kernelsize to " + str(mean_kernelsize)) if mode_kernelsize == 0 or mean_kernelsize == 0: raise ValueError("Kernel length is 0!") logging.debug("trace matrix shape: %d %d" % (traces.shape[0], traces.shape[1])) modeline = np.zeros(traces.shape[1]) modelineLP = np.zeros(traces.shape[1]) dff = np.zeros((traces.shape[0], traces.shape[1])) logging.debug("computing df/f") for n in range(0, traces.shape[0]): if np.any(np.isnan(traces[n])): logging.warning( "trace for roi %d contains NaNs, setting to NaN", n) dff[n, :] = np.nan continue movingmode_fast(traces[n, :], mode_kernelsize, modeline[:]) movingaverage(modeline[:], mean_kernelsize, modelineLP[:]) dff[n, :] = (traces[n, :] - modelineLP[:]) / modelineLP[:] logging.debug("finished trace %d/%d" % (n + 1, traces.shape[0])) return dff
[docs]def compute_dff_windowed_median(traces, median_kernel_long=5401, median_kernel_short=101, noise_stds=None, n_small_baseline_frames=None, **kwargs): """Compute dF/F of a set of traces with median filter detrending. The operation is basically: T_long = windowed_median(T) # long timescale kernel T_dff1 = (T - T_long) / elementwise_max(T_long, noise_std(T)) T_short = windowed_median(T_dff1) # short timescale kernel T_dff = T_dff1 - elementwise_min(T_short, 2.5*noise_std(T_dff1)) Parameters ---------- traces : np.ndarray 2D array of traces to be analyzed. median_kernel_long : int Window size to use for long timescale median detrending. median_kernel_short : int Window size to use for short timescale median detrending. noise_stds : list List that will contain noise_std(T_dff1) for each trace. The value for each trace will be appended to the list if provided. n_small_baseline_frames : list List that will contain the number of frames for each trace where the long-timescale median window is less than noise_std(T). The value for each trace will be appended to the list if provided. kwargs: Additional keyword arguments are passed to :func:`noise_std` . Returns ------- dff : np.ndarray 2D array of dF/F traces. """ _check_kernel(median_kernel_long, traces.shape[1]) _check_kernel(median_kernel_short, traces.shape[1]) dff_traces = np.copy(traces) for dff in dff_traces: sigma_f = noise_std(dff, **kwargs) # long timescale median filter for baseline subtraction tf = median_filter(dff, median_kernel_long, mode='constant') dff -= tf dff /= np.maximum(tf, sigma_f) if n_small_baseline_frames is not None: n_small_baseline_frames.append(np.sum(tf <= sigma_f)) sigma_dff = noise_std(dff, **kwargs) if noise_stds is not None: noise_stds.append(sigma_dff) # short timescale detrending tf = median_filter(dff, median_kernel_short, mode='constant') tf = np.minimum(tf, 2.5*sigma_dff) dff -= tf return dff_traces
def _check_kernel(kernel_size, data_size): if kernel_size % 2 == 0 or kernel_size <= 0 or kernel_size >= data_size: raise ValueError("Invalid kernel length {} for data length {}. Kernel " "length must be positive and odd, and less than data " "length.".format(kernel_size, data_size))
[docs]def noise_std(x, noise_kernel_length=31, positive_peak_scale=1.5, outlier_std_scale=2.5): """Robust estimate of the standard deviation of the trace noise.""" _check_kernel(noise_kernel_length, len(x)) if any(np.isnan(x)): return np.NaN x = x - median_filter(x, noise_kernel_length, mode='constant') # first pass removing big pos peak outliers x = x[x < positive_peak_scale*np.abs(x.min())] rstd = robust_std(x) # second pass removing remaining pos and neg peak outliers x = x[abs(x) < outlier_std_scale*rstd] return robust_std(x)
[docs]def robust_std(x): """Robust estimate of standard deviation. Estimate of the standard deviation using the median absolute deviation of x. """ median_absolute_deviation = np.median(np.abs(x - np.median(x))) return GAUSSIAN_MAD_STD_SCALE*median_absolute_deviation
[docs]def calculate_dff(traces, dff_computation_cb=None, save_plot_dir=None): """Apply dF/F computation to a set of traces. The default computation method is :func:`compute_dff_windowed_median` using default window parameters. Parameters ---------- traces : np.ndarray 2D array of traces to be analyzed. dff_computation_cb : function Function that takes traces as an argument and returns an array of the same shape that is the calculated dF/F. save_plot_dir : str Directory to save dF/F plots to. By default no plots are saved. Returns ------- dff : np.ndarray 2D array of dF/F traces. """ if dff_computation_cb is None: dff_computation_cb = compute_dff_windowed_median dff = dff_computation_cb(traces) if save_plot_dir is not None: if not os.path.exists(save_plot_dir): os.makedirs(save_plot_dir) for n in range(0, traces.shape[0]): if np.any(np.isnan(traces[n])): continue fig = plt.figure(figsize=(150, 40)) plot_onetrace(dff[n, :], traces[n, :]) plt.title('ROI ' + str(n) + ' ', fontsize=18) fig.savefig(os.path.join(save_plot_dir, 'dff_%d.png' % n), orientation='landscape') plt.close(fig) return dff
[docs]def main(): parser = argparse.ArgumentParser() parser.add_argument("input_h5") parser.add_argument("output_h5") parser.add_argument("--plot_dir") parser.add_argument("--log_level", default=logging.INFO) args = parser.parse_args() logging.getLogger().setLevel(args.log_level) # read from "data" if args.input_h5.endswith("nwb"): timestamps, traces = BrainObservatoryNwbDataSet( args.input_h5).get_corrected_fluorescence_traces() else: input_h5 = h5py.File(args.input_h5, "r") traces = input_h5["data"][()] input_h5.close() dff = calculate_dff(traces, save_plot_dir=args.plot_dir) # write to "data" output_h5 = h5py.File(args.output_h5, "w") output_h5["data"] = dff output_h5.close()
if __name__ == "__main__": main()