Source code for allensdk.internal.pipeline_modules.run_neuropil_correction

#!/usr/bin/python
import matplotlib
matplotlib.use('agg')
import matplotlib.pyplot as plt
import logging
import numpy as np
from allensdk.brain_observatory.r_neuropil import estimate_contamination_ratios
import allensdk.internal.core.lims_utilities as lu
import h5py
import json
import copy
import os
import sys
import argparse
import shutil

from allensdk.internal.core.lims_pipeline_module import PipelineModule, run_module

[docs]def debug(experiment_id, local=False): OUTPUT_DIRECTORY = "/data/informatics/CAM/neuropil" SDK_PATH = "/data/informatics/CAM/neuropil/allensdk" SCRIPT = "/data/informatics/CAM/neuropil/allensdk/allensdk/internal/pipeline_modules/run_neuropil_correction.py" exp = lu.query("select * from ophys_experiments where id = %d" % experiment_id)[0] sd = exp["storage_directory"] test_file = "/data/informatics/CAM/demix/%d/demixed_traces.h5" % experiment_id if os.path.exists(test_file): roi_trace_file = test_file else: roi_trace_file = os.path.join(sd, "demix", "%d_demixed_traces.h5" % experiment_id) exp_dir = os.path.join(OUTPUT_DIRECTORY, str(experiment_id)) input_data = dict( roi_trace_file = roi_trace_file, neuropil_trace_file = os.path.join(sd, "processed", "neuropil_traces.h5"), storage_directory = exp_dir ) run_module(SCRIPT, input_data, exp_dir, sdk_path=SDK_PATH, pbs=dict(vmem=160, job_name="np_%d"% experiment_id, walltime="36:00:00"), local=local, optional_args=['--log-level','DEBUG'])
[docs]def debug_plot(file_name, roi_trace, neuropil_trace, corrected_trace, r, r_vals=None, err_vals=None): fig = plt.figure(figsize=(15,10)) ax = fig.add_subplot(211) ax.plot(roi_trace,'r', label="raw") ax.plot(corrected_trace,'b', label="fc") ax.plot(neuropil_trace,'g', label="neuropil") ax.set_xlim(0,roi_trace.size) ax.set_title('raw(%.02f, %.02f) fc(%.02f, %.02f) r(%f)' % (roi_trace.min(), roi_trace.max(), corrected_trace.min(), corrected_trace.max(), r)) ax.legend() if r_vals is not None: ax = fig.add_subplot(212) ax.plot(r_vals, err_vals, "o") plt.savefig(file_name) plt.close()
[docs]def adjust_r_for_negativity(r, F_C, F_M, F_N): # this function is no longer used, but leaving it here just in case # loop through all of the negative spots and pick r to fix them neg_is = np.argwhere(F_C < 0) if neg_is.size > 0: logging.debug("Correcting for negative trace, starting with r = %f", r) for i in neg_is: if F_C[i] >= 0: continue # r_new = (F_C[i] + r_old * F_N[i]) / F_N[i] r = F_M[i] / F_N[i] F_C = F_M - r * F_N logging.debug(" updated r to %f", r) # if there is still a negative spot, it's off by some tiny epsilon. # step r down by delta_r increments until we find one that works. delta_r = -1e-5 while F_C.min() < 0 and r >= 0.0: r += delta_r F_C = F_M - r * F_N logging.debug(" stepped r to %f", r) logging.debug(" finished with r = %f", r) return r
[docs]def main(): module = PipelineModule() args = module.args jin = module.input_data() ######################################################################## # prelude -- get processing metadata trace_file = jin["roi_trace_file"] neuropil_file = jin["neuropil_trace_file"] storage_dir = jin["storage_directory"] plot_dir = os.path.join(storage_dir, "neuropil_subtraction_plots") if os.path.exists(plot_dir): shutil.rmtree(plot_dir) try: os.makedirs(plot_dir) except: pass logging.info("Neuropil correcting '%s'", trace_file) ######################################################################## # process data try: roi_traces = h5py.File(trace_file, "r") except: logging.error("Error: unable to open ROI trace file '%s'", trace_file) raise try: neuropil_traces = h5py.File(neuropil_file, "r") except: logging.error("Error: unable to open neuropil trace file '%s'", neuropil_file) raise ''' get number of traces, length, etc. ''' num_traces, T = roi_traces['data'].shape T_orig = T T_cross_val = int(T/2) if (T - T_cross_val > T_cross_val): T = T - 1 # make sure that ROI and neuropil trace files are organized the same n_id = neuropil_traces["roi_names"][:].astype(str) r_id = roi_traces["roi_names"][:].astype(str) logging.info("Processing %d traces", len(n_id)) assert len(n_id) == len(r_id), "Input trace files are not aligned (ROI count)" for i in range(len(n_id)): assert n_id[i] == r_id[i], "Input trace files are not aligned (ROI IDs)" ''' initialize storage variables and analysis routine ''' r_list = [ None ] * num_traces RMSE_list = [ -1 ] * num_traces roi_names = n_id corrected = np.zeros((num_traces, T_orig)) r_vals = [ None ] * num_traces for n in range(num_traces): roi = roi_traces['data'][n] neuropil = neuropil_traces['data'][n] if np.any(np.isnan(neuropil)): logging.warning("neuropil trace for roi %d contains NaNs, skipping", n) continue if np.any(np.isnan(roi)): logging.warning("roi trace for roi %d contains NaNs, skipping", n) continue r = None logging.info("Correcting trace %d (roi %s)", n, str(n_id[n])) results = estimate_contamination_ratios(roi, neuropil) logging.info("r=%f err=%f it=%d", results["r"], results["err"], results["it"]) r = results["r"] fc = roi - r * neuropil RMSE_list[n] = results["err"] r_vals[n] = results["r_vals"] debug_plot(os.path.join(plot_dir, "initial_%04d.png" % n), roi, neuropil, fc, r, results["r_vals"], results["err_vals"]) # mean of the corrected trace must be positive if fc.mean() > 0: r_list[n] = r corrected[n,:] = fc else: logging.warning("fc has negative baseline, skipping this r value") # compute mean valid r value r_mean = np.array([r for r in r_list if r is not None ]).mean() # fill in empty r values for n in range(num_traces): roi = roi_traces['data'][n] neuropil = neuropil_traces['data'][n] if r_list[n] is None: logging.warning("Error estimated r for trace %d. Setting to zero.", n) r_list[n] = 0 corrected[n,:] = roi # save a debug plot debug_plot(os.path.join(plot_dir, "final_%04d.png" % n), roi, neuropil, corrected[n,:], r_list[n]) # one last sanity check eps = -0.0001 if np.mean(corrected[n,:]) < eps: raise Exception("Trace %d baseline is still negative value after correction" % n) if r_list[n] < 0.0: raise Exception("Trace %d ended with negative r" % n) ######################################################################## # write out processed data try: savefile = os.path.join(storage_dir, "neuropil_correction.h5") hf = h5py.File(savefile, 'w') hf.create_dataset("r", data=r_list) hf.create_dataset("RMSE", data=RMSE_list) hf.create_dataset("FC", data=corrected, compression="gzip") hf.create_dataset("roi_names", data=roi_names.astype(np.string_)) for n in range(num_traces): r = r_vals[n] if r is not None: hf.create_dataset("r_vals/%d" % n, data=r) hf.close() except: logging.error("Error creating output h5 file") raise roi_traces.close() neuropil_traces.close() jout = copy.copy(jin) jout["neuropil_correction"] = savefile module.write_output_data(jout) logging.info("finished")
if __name__ == "__main__": main()