Source code for allensdk.internal.pipeline_modules.run_roi_filter

import logging
from six.moves import cPickle
import allensdk.internal.core.lims_utilities as lu
from allensdk.internal.core.lims_pipeline_module import PipelineModule, run_module
from allensdk.internal.brain_observatory import roi_filter, roi_filter_utils
from allensdk.brain_observatory.roi_masks import (RIGHT_SHIFT, LEFT_SHIFT,
                                                  DOWN_SHIFT, UP_SHIFT)
import pandas as pd
import os
import numpy
import h5py

DEPRECATED_MOTION_HEADER = ["index", "x", "y", "a", "b", "c", "d", "e", "f"]
MAX_SHIFT = 30
OVERLAP_THRESHOLD = 0.9
DEBUG_SDK_PATH="/data/informatics/CAM/roi_filter/allensdk/"
DEBUG_SCRIPT=os.path.join(DEBUG_SDK_PATH, "allensdk", "internal",
                          "pipeline_modules", "run_roi_filter.py")
DEBUG_OUTPUT_DIRECTORY="/data/informatics/CAM/roi_filter/"


[docs]def get_motion_filepath(experiment_id):
    return lu.query("""
select CONCAT(wkf.storage_directory, wkf.filename) as path
from well_known_files wkf
join well_known_file_types wkft on wkft.id = wkf.well_known_file_type_id
join ophys_experiments oe on oe.id = wkf.attachable_id
where oe.id = {} and
wkft.name like 'OphysMotionXyOffsetData'""".format(experiment_id))[0]["path"]


[docs]def get_segmentation_filepath(experiment_id, file_type):
    return lu.query("""
select CONCAT(wkf.storage_directory, wkf.filename) as path
from well_known_files wkf
join well_known_file_types wkft on wkft.id = wkf.well_known_file_type_id
join ophys_cell_segmentation_runs ocsr on ocsr.id = wkf.attachable_id
join ophys_experiments oe on oe.id = ocsr.ophys_experiment_id
where oe.id = {} and wkft.name like '{}'
and ocsr.current = 't'""".format(experiment_id, file_type))[0]["path"]


[docs]def get_model_info(experiment_id):
    res = lu.query("""
select CONCAT(wkf.storage_directory, wkf.filename) as path, wkf.id
from ophys_experiments oe
join ophys_sessions os on os.id = oe.ophys_session_id
join projects p on p.id = os.project_id
join well_known_files wkf on wkf.attachable_id = p.id
join well_known_file_types wkft on wkft.id = wkf.well_known_file_type_id
where oe.id = {} and
wkft.name = 'RoiLabelModel'""".format(experiment_id))[0]
    return res["path"], res["id"]


[docs]def get_genotype_info(experiment_id, code):
    res = lu.query("""
select g.name
from ophys_experiments oe
join ophys_sessions os on os.id = oe.ophys_session_id
join specimens s on s.id = os.specimen_id
join donors d on d.id = s.donor_id
join donors_genotypes dg on dg.donor_id = d.id
join genotypes g on g.id = dg.genotype_id
join genotype_types gt on gt.id = g.genotype_type_id
where oe.id = {} and gt.code like '{}'""".format(experiment_id, code))
    output = set()
    for line in res:
        output.add(line["name"])
    return list(output)


[docs]def create_input_data(experiment_id):
    data = {}
    data["log_0"] = get_motion_filepath(experiment_id)
    model, model_id = get_model_info(experiment_id)
    data["roi_label_model"] = model
    data["roi_label_model_id"] = model_id
    data["targeted_structure_id"] = lu.query("""
select targeted_structure_id
from ophys_experiments oe
where oe.id = {}""".format(experiment_id))[0]["targeted_structure_id"]
    data["imaging_depth"] = lu.query("""
select calculated_depth
from ophys_experiments oe
where oe.id = {}""".format(experiment_id))[0]["calculated_depth"]
    data["drivers"] = get_genotype_info(experiment_id, "D")
    data["reporters"] = get_genotype_info(experiment_id, "R")
    data["max_int_file"] = get_segmentation_filepath(
        experiment_id, "OphysSegmentationMaskData")
    data["object_list"] = get_segmentation_filepath(
        experiment_id, "OphysSegmentationObjects")
    return data


[docs]def debug(experiment_id, local=False, sdk_path=DEBUG_SDK_PATH,
          script=DEBUG_SCRIPT, output_directory=DEBUG_OUTPUT_DIRECTORY):
    input_data = create_input_data(experiment_id)
    exp_dir = os.path.join(output_directory, str(experiment_id))
    run_module(script,
               input_data,
               exp_dir,
               sdk_path=sdk_path,
               local=local)


[docs]def load_object_list(filename):
    '''Load the object list file.'''
    dataframe = pd.read_csv(filename)
    dataframe.columns = [column.strip() for column in dataframe.columns]
    return dataframe


[docs]def is_deprecated_motion_file(filename):
    '''Check if a file is an old style motion correction file.

    By agreement, new-style files will always have a header and that
    header will always contain at least 1 alpha character.
    '''
    with open(filename, "r") as f:
        return not any([c.isalpha() for c in f.readline()])


[docs]def load_rigid_motion_transform(filename):
    '''Load the rigid motion transform file.'''
    if is_deprecated_motion_file(filename):
        return pd.read_csv(filename, header=None,
                           names=DEPRECATED_MOTION_HEADER)
    else:
        return pd.read_csv(filename)


[docs]def load_all_input(data):
    '''Load all input data from the input json.'''
    try:
        object_list_file = data["object_list"]
        object_data = load_object_list(object_list_file)
    except KeyError:
        logging.error("Input json missing object_list")
        raise
    except IOError:
        logging.error("Could not read object list file %s", object_list_file)
        raise

    try:
        rigid_motion_transform_file = data["log_0"]  # TODO: update name in LIMS and here
        motion_data = load_rigid_motion_transform(rigid_motion_transform_file)
    except KeyError:
        logging.error("Input json missing log_0")  # TODO: update name in LIMS and here
        raise
    except IOError:
        logging.error("Could not read rigid motion transform file %s",
                      rigid_motion_transform_file)
        raise

    try:
        maxint_file = data["max_int_file"]
        with h5py.File(maxint_file, "r") as f:
            segmentation_stack = f["data"][...]
    except KeyError:
        logging.error("Input json missing max_int_file")
        raise
    except IOError:
        logging.error("Could not read max_int_file file %s", maxint_file)
        raise

    try:
        model_file = data["roi_label_model"]
        classifier = roi_filter.ROIClassifier.from_file(model_file)
    except KeyError:
        logging.error("Input json missing roi_label_model")
        raise
    except IOError:
        logging.error("Could not read roi_label_model file %s", model_file)
        raise

    try:
        depth = float(data["imaging_depth"])
    except KeyError:
        logging.error("Input json missing imaging_depth")
        raise
    except ValueError:
        logging.error("Invalid depth %s", data["imaging_depth"])
        raise

    try:
        structure_id = str(data["targeted_structure_id"])
    except KeyError:
        logging.error("Input json missing targeted_structure_id")
        raise

    try:
        model_id = data["roi_label_model_id"]
    except KeyError:
        logging.error("Input json missing roi_label_model_id")
        raise

    try:
        drivers = data["drivers"]
    except KeyError:
        logging.error("Input json drivers")
        raise

    try:
        reporters = data["reporters"]
    except KeyError:
        logging.error("Input json missing reporters")
        raise

    border = roi_filter_utils.calculate_max_border(motion_data, MAX_SHIFT)
    rois = roi_filter_utils.get_rois(segmentation_stack, border)
    rois = roi_filter_utils.order_rois_by_object_list(object_data, rois)

    result = {"model_id": model_id,
              "classifier": classifier,
              "object_data": object_data,
              "depth": depth,
              "structure_id": structure_id,
              "drivers": drivers,
              "reporters": reporters,
              "border": border,
              "rois": rois}
    return result


[docs]def create_output_data(rois, model_id, border, excluded,
                       unexpected_features):
    data = {}
    data["motion_border"] = {"x0": border[RIGHT_SHIFT],
                             "y0": border[DOWN_SHIFT],
                             "x1": border[LEFT_SHIFT],
                             "y1": border[UP_SHIFT]}
    data["roi_label_model_id"] = model_id
    data["unexpected_features"] = unexpected_features
    if rois:
        data["image"] = {"width": rois[0].img_cols,
                         "height": rois[0].img_rows}
    json_rois = {}
    for i, roi in enumerate(rois):
        json_roi = {}
        json_roi["x"] = roi.x
        json_roi["y"] = roi.y
        json_roi["width"] = roi.width
        json_roi["height"] = roi.height
        json_roi["mask"] = roi.mask
        json_roi["mask_page"] = roi.mask_group
        json_roi["exclusion_labels"] = roi.labels
        if roi.labels:
            json_roi["valid"] = False
        else:
            json_roi["valid"] = True
        # for backwards compatibility
        json_roi["exclude_code"] = excluded[i]
        json_rois[roi.label] = json_roi
    data["rois"] = json_rois
    return data


[docs]def main():
    mod = PipelineModule("Filter Ophys ROIs produced from cell segmentation.")

    input_data = mod.input_data()
    data = load_all_input(input_data)
    model_id = data["model_id"]
    classifier = data["classifier"]
    object_data = data["object_data"]
    depth = data["depth"]
    structure_id = data["structure_id"]
    drivers = data["drivers"]
    reporters = data["reporters"]
    border = data["border"]
    rois = data["rois"]

    label_array = classifier.get_labels(object_data, depth, structure_id,
                                        drivers, reporters)
    rois = roi_filter.apply_labels(rois, label_array, classifier.label_names)

    rois = roi_filter.label_unions_and_duplicates(rois, OVERLAP_THRESHOLD)

    output_data = create_output_data(rois, model_id, border,
                                     object_data["eXcluded"],
                                     classifier.unexpected_features)

    mod.write_output_data(output_data)

if __name__ == "__main__": main()
Source code for allensdk.internal.pipeline_modules.run_roi_filter

Contents

Questions