Source code for allensdk.internal.pipeline_modules.run_neuropil_correction

#!/usr/bin/python
import matplotlib
matplotlib.use('agg')
import matplotlib.pyplot as plt
import logging
import numpy as np
from allensdk.brain_observatory.r_neuropil import estimate_contamination_ratios
import allensdk.internal.core.lims_utilities as lu
import h5py
import json
import copy
import os
import sys
import argparse
import shutil

from allensdk.internal.core.lims_pipeline_module import PipelineModule, run_module

[docs]def debug(experiment_id, local=False):
    OUTPUT_DIRECTORY = "/data/informatics/CAM/neuropil"
    SDK_PATH = "/data/informatics/CAM/neuropil/allensdk"
    SCRIPT = "/data/informatics/CAM/neuropil/allensdk/allensdk/internal/pipeline_modules/run_neuropil_correction.py"

    exp = lu.query("select * from ophys_experiments where id = %d" % experiment_id)[0]
    sd = exp["storage_directory"]

    test_file = "/data/informatics/CAM/demix/%d/demixed_traces.h5" % experiment_id
    if os.path.exists(test_file):
        roi_trace_file = test_file
    else:
        roi_trace_file = os.path.join(sd, "demix", "%d_demixed_traces.h5" % experiment_id)

    exp_dir = os.path.join(OUTPUT_DIRECTORY, str(experiment_id))
    input_data = dict(
        roi_trace_file = roi_trace_file,
        neuropil_trace_file = os.path.join(sd, "processed", "neuropil_traces.h5"),
        storage_directory = exp_dir
        )

    run_module(SCRIPT, 
               input_data, 
               exp_dir,
               sdk_path=SDK_PATH,
               pbs=dict(vmem=160,
                        job_name="np_%d"% experiment_id,
                        walltime="36:00:00"),
               local=local,
               optional_args=['--log-level','DEBUG'])
    

[docs]def debug_plot(file_name, roi_trace, neuropil_trace, corrected_trace, r, r_vals=None, err_vals=None):
    fig = plt.figure(figsize=(15,10))

    ax = fig.add_subplot(211)
    ax.plot(roi_trace,'r', label="raw")
    ax.plot(corrected_trace,'b', label="fc")
    ax.plot(neuropil_trace,'g', label="neuropil")
    ax.set_xlim(0,roi_trace.size)
    ax.set_title('raw(%.02f, %.02f) fc(%.02f, %.02f) r(%f)' % (roi_trace.min(), roi_trace.max(), corrected_trace.min(), corrected_trace.max(), r))
    ax.legend()
    
    if r_vals is not None:
        ax = fig.add_subplot(212)
        ax.plot(r_vals, err_vals, "o")

    plt.savefig(file_name)
    plt.close()

[docs]def adjust_r_for_negativity(r, F_C, F_M, F_N):
    # this function is no longer used, but leaving it here just in case
    # loop through all of the negative spots and pick r to fix them
    neg_is = np.argwhere(F_C < 0)
    if neg_is.size > 0:
        logging.debug("Correcting for negative trace, starting with r = %f", r)

        for i in neg_is:
            if F_C[i] >= 0:
                continue
            # r_new = (F_C[i] + r_old * F_N[i]) / F_N[i]
            r = F_M[i] / F_N[i]
            F_C = F_M - r * F_N
            logging.debug("  updated r to %f", r)


        # if there is still a negative spot, it's off by some tiny epsilon.
        # step r down by delta_r increments until we find one that works.
        delta_r = -1e-5
        while F_C.min() < 0 and r >= 0.0:
            r += delta_r
            F_C = F_M - r * F_N
            logging.debug("  stepped r to %f", r)

        logging.debug("  finished with r = %f", r)

    return r


[docs]def main():
    module = PipelineModule()
    args = module.args

    jin = module.input_data()

    ########################################################################
    # prelude -- get processing metadata

    trace_file = jin["roi_trace_file"]
    neuropil_file = jin["neuropil_trace_file"]
    storage_dir = jin["storage_directory"]

    plot_dir = os.path.join(storage_dir, "neuropil_subtraction_plots")
    if os.path.exists(plot_dir):
        shutil.rmtree(plot_dir)

    try:
        os.makedirs(plot_dir)
    except:
        pass

    logging.info("Neuropil correcting '%s'", trace_file)

    ########################################################################
    # process data

    try:
        roi_traces = h5py.File(trace_file, "r")
    except:
        logging.error("Error: unable to open ROI trace file '%s'", trace_file)
        raise

    try:
        neuropil_traces = h5py.File(neuropil_file, "r")
    except:
        logging.error("Error: unable to open neuropil trace file '%s'", neuropil_file)
        raise

    '''
    get number of traces, length, etc.
    '''
    num_traces, T = roi_traces['data'].shape
    T_orig = T
    T_cross_val = int(T/2)
    if (T - T_cross_val > T_cross_val):
        T = T - 1

    # make sure that ROI and neuropil trace files are organized the same
    n_id = neuropil_traces["roi_names"][:].astype(str)
    r_id = roi_traces["roi_names"][:].astype(str)
    logging.info("Processing %d traces", len(n_id))
    assert len(n_id) == len(r_id), "Input trace files are not aligned (ROI count)"
    for i in range(len(n_id)):
        assert n_id[i] == r_id[i], "Input trace files are not aligned (ROI IDs)"
    '''
    initialize storage variables and analysis routine
    '''
    r_list = [ None ] * num_traces
    RMSE_list = [ -1 ] * num_traces
    roi_names = n_id
    corrected = np.zeros((num_traces, T_orig))
    r_vals = [ None ] * num_traces

    for n in range(num_traces):
        roi = roi_traces['data'][n]
        neuropil = neuropil_traces['data'][n]   

        if np.any(np.isnan(neuropil)):
            logging.warning("neuropil trace for roi %d contains NaNs, skipping", n)
            continue
            
        if np.any(np.isnan(roi)):
            logging.warning("roi trace for roi %d contains NaNs, skipping", n)
            continue

        r = None

        logging.info("Correcting trace %d (roi %s)", n, str(n_id[n]))
        results  = estimate_contamination_ratios(roi, neuropil)
        logging.info("r=%f err=%f it=%d", results["r"], results["err"], results["it"])

        r = results["r"]
        fc = roi - r * neuropil
        RMSE_list[n] = results["err"]
        r_vals[n] = results["r_vals"]
        
        debug_plot(os.path.join(plot_dir, "initial_%04d.png" % n),
                   roi, neuropil, fc, r, results["r_vals"], results["err_vals"])

        # mean of the corrected trace must be positive
        if fc.mean() > 0:
            r_list[n] = r
            corrected[n,:] = fc
        else:
            logging.warning("fc has negative baseline, skipping this r value")

    # compute mean valid r value 
    r_mean = np.array([r for r in r_list if r is not None ]).mean()

    # fill in empty r values
    for n in range(num_traces):        
        roi = roi_traces['data'][n]
        neuropil = neuropil_traces['data'][n]

        if r_list[n] is None:
            logging.warning("Error estimated r for trace %d. Setting to zero.", n)
            r_list[n] = 0
            corrected[n,:] = roi

        # save a debug plot
        debug_plot(os.path.join(plot_dir, "final_%04d.png" % n),
                   roi, neuropil, corrected[n,:], r_list[n])

        # one last sanity check
        eps = -0.0001
        if np.mean(corrected[n,:]) < eps:
            raise Exception("Trace %d baseline is still negative value after correction" % n)

        if r_list[n] < 0.0:
            raise Exception("Trace %d ended with negative r" % n)
        

    ########################################################################
    # write out processed data

    try:
        savefile = os.path.join(storage_dir, "neuropil_correction.h5")
        hf = h5py.File(savefile, 'w')
        hf.create_dataset("r", data=r_list)
        hf.create_dataset("RMSE", data=RMSE_list)
        hf.create_dataset("FC", data=corrected, compression="gzip")
        hf.create_dataset("roi_names", data=roi_names.astype(np.string_))

        for n in range(num_traces):
            r = r_vals[n]
            if r is not None:
                hf.create_dataset("r_vals/%d" % n, data=r)
        hf.close()
    except:
        logging.error("Error creating output h5 file")
        raise   
    
    roi_traces.close()
    neuropil_traces.close()

    jout = copy.copy(jin)
    jout["neuropil_correction"] = savefile
    module.write_output_data(jout)

    logging.info("finished")

if __name__ == "__main__": main()
Source code for allensdk.internal.pipeline_modules.run_neuropil_correction

Contents

Questions