#!/usr/bin/python
import matplotlib
matplotlib.use('agg')
import matplotlib.pyplot as plt
import logging
import numpy as np
from allensdk.brain_observatory.r_neuropil import estimate_contamination_ratios
import allensdk.internal.core.lims_utilities as lu
import h5py
import json
import copy
import os
import sys
import argparse
import shutil
from allensdk.internal.core.lims_pipeline_module import PipelineModule, run_module
[docs]def debug(experiment_id, local=False):
OUTPUT_DIRECTORY = "/data/informatics/CAM/neuropil"
SDK_PATH = "/data/informatics/CAM/neuropil/allensdk"
SCRIPT = "/data/informatics/CAM/neuropil/allensdk/allensdk/internal/pipeline_modules/run_neuropil_correction.py"
exp = lu.query("select * from ophys_experiments where id = %d" % experiment_id)[0]
sd = exp["storage_directory"]
test_file = "/data/informatics/CAM/demix/%d/demixed_traces.h5" % experiment_id
if os.path.exists(test_file):
roi_trace_file = test_file
else:
roi_trace_file = os.path.join(sd, "demix", "%d_demixed_traces.h5" % experiment_id)
exp_dir = os.path.join(OUTPUT_DIRECTORY, str(experiment_id))
input_data = dict(
roi_trace_file = roi_trace_file,
neuropil_trace_file = os.path.join(sd, "processed", "neuropil_traces.h5"),
storage_directory = exp_dir
)
run_module(SCRIPT,
input_data,
exp_dir,
sdk_path=SDK_PATH,
pbs=dict(vmem=160,
job_name="np_%d"% experiment_id,
walltime="36:00:00"),
local=local,
optional_args=['--log-level','DEBUG'])
[docs]def debug_plot(file_name, roi_trace, neuropil_trace, corrected_trace, r, r_vals=None, err_vals=None):
fig = plt.figure(figsize=(15,10))
ax = fig.add_subplot(211)
ax.plot(roi_trace,'r', label="raw")
ax.plot(corrected_trace,'b', label="fc")
ax.plot(neuropil_trace,'g', label="neuropil")
ax.set_xlim(0,roi_trace.size)
ax.set_title('raw(%.02f, %.02f) fc(%.02f, %.02f) r(%f)' % (roi_trace.min(), roi_trace.max(), corrected_trace.min(), corrected_trace.max(), r))
ax.legend()
if r_vals is not None:
ax = fig.add_subplot(212)
ax.plot(r_vals, err_vals, "o")
plt.savefig(file_name)
plt.close()
[docs]def adjust_r_for_negativity(r, F_C, F_M, F_N):
# this function is no longer used, but leaving it here just in case
# loop through all of the negative spots and pick r to fix them
neg_is = np.argwhere(F_C < 0)
if neg_is.size > 0:
logging.debug("Correcting for negative trace, starting with r = %f", r)
for i in neg_is:
if F_C[i] >= 0:
continue
# r_new = (F_C[i] + r_old * F_N[i]) / F_N[i]
r = F_M[i] / F_N[i]
F_C = F_M - r * F_N
logging.debug(" updated r to %f", r)
# if there is still a negative spot, it's off by some tiny epsilon.
# step r down by delta_r increments until we find one that works.
delta_r = -1e-5
while F_C.min() < 0 and r >= 0.0:
r += delta_r
F_C = F_M - r * F_N
logging.debug(" stepped r to %f", r)
logging.debug(" finished with r = %f", r)
return r
[docs]def main():
module = PipelineModule()
args = module.args
jin = module.input_data()
########################################################################
# prelude -- get processing metadata
trace_file = jin["roi_trace_file"]
neuropil_file = jin["neuropil_trace_file"]
storage_dir = jin["storage_directory"]
plot_dir = os.path.join(storage_dir, "neuropil_subtraction_plots")
if os.path.exists(plot_dir):
shutil.rmtree(plot_dir)
try:
os.makedirs(plot_dir)
except:
pass
logging.info("Neuropil correcting '%s'", trace_file)
########################################################################
# process data
try:
roi_traces = h5py.File(trace_file, "r")
except:
logging.error("Error: unable to open ROI trace file '%s'", trace_file)
raise
try:
neuropil_traces = h5py.File(neuropil_file, "r")
except:
logging.error("Error: unable to open neuropil trace file '%s'", neuropil_file)
raise
'''
get number of traces, length, etc.
'''
num_traces, T = roi_traces['data'].shape
T_orig = T
T_cross_val = int(T/2)
if (T - T_cross_val > T_cross_val):
T = T - 1
# make sure that ROI and neuropil trace files are organized the same
n_id = neuropil_traces["roi_names"][:].astype(str)
r_id = roi_traces["roi_names"][:].astype(str)
logging.info("Processing %d traces", len(n_id))
assert len(n_id) == len(r_id), "Input trace files are not aligned (ROI count)"
for i in range(len(n_id)):
assert n_id[i] == r_id[i], "Input trace files are not aligned (ROI IDs)"
'''
initialize storage variables and analysis routine
'''
r_list = [ None ] * num_traces
RMSE_list = [ -1 ] * num_traces
roi_names = n_id
corrected = np.zeros((num_traces, T_orig))
r_vals = [ None ] * num_traces
for n in range(num_traces):
roi = roi_traces['data'][n]
neuropil = neuropil_traces['data'][n]
if np.any(np.isnan(neuropil)):
logging.warning("neuropil trace for roi %d contains NaNs, skipping", n)
continue
if np.any(np.isnan(roi)):
logging.warning("roi trace for roi %d contains NaNs, skipping", n)
continue
r = None
logging.info("Correcting trace %d (roi %s)", n, str(n_id[n]))
results = estimate_contamination_ratios(roi, neuropil)
logging.info("r=%f err=%f it=%d", results["r"], results["err"], results["it"])
r = results["r"]
fc = roi - r * neuropil
RMSE_list[n] = results["err"]
r_vals[n] = results["r_vals"]
debug_plot(os.path.join(plot_dir, "initial_%04d.png" % n),
roi, neuropil, fc, r, results["r_vals"], results["err_vals"])
# mean of the corrected trace must be positive
if fc.mean() > 0:
r_list[n] = r
corrected[n,:] = fc
else:
logging.warning("fc has negative baseline, skipping this r value")
# compute mean valid r value
r_mean = np.array([r for r in r_list if r is not None ]).mean()
# fill in empty r values
for n in range(num_traces):
roi = roi_traces['data'][n]
neuropil = neuropil_traces['data'][n]
if r_list[n] is None:
logging.warning("Error estimated r for trace %d. Setting to zero.", n)
r_list[n] = 0
corrected[n,:] = roi
# save a debug plot
debug_plot(os.path.join(plot_dir, "final_%04d.png" % n),
roi, neuropil, corrected[n,:], r_list[n])
# one last sanity check
eps = -0.0001
if np.mean(corrected[n,:]) < eps:
raise Exception("Trace %d baseline is still negative value after correction" % n)
if r_list[n] < 0.0:
raise Exception("Trace %d ended with negative r" % n)
########################################################################
# write out processed data
try:
savefile = os.path.join(storage_dir, "neuropil_correction.h5")
hf = h5py.File(savefile, 'w')
hf.create_dataset("r", data=r_list)
hf.create_dataset("RMSE", data=RMSE_list)
hf.create_dataset("FC", data=corrected, compression="gzip")
hf.create_dataset("roi_names", data=roi_names.astype(np.string_))
for n in range(num_traces):
r = r_vals[n]
if r is not None:
hf.create_dataset("r_vals/%d" % n, data=r)
hf.close()
except:
logging.error("Error creating output h5 file")
raise
roi_traces.close()
neuropil_traces.close()
jout = copy.copy(jin)
jout["neuropil_correction"] = savefile
module.write_output_data(jout)
logging.info("finished")
if __name__ == "__main__": main()