Source code for allensdk.internal.pipeline_modules.run_roi_filter

import logging
from six.moves import cPickle
import allensdk.internal.core.lims_utilities as lu
from allensdk.internal.core.lims_pipeline_module import PipelineModule, run_module
from allensdk.internal.brain_observatory import roi_filter, roi_filter_utils
from allensdk.brain_observatory.roi_masks import (RIGHT_SHIFT, LEFT_SHIFT,
                                                  DOWN_SHIFT, UP_SHIFT)
import pandas as pd
import os
import numpy
import h5py

DEPRECATED_MOTION_HEADER = ["index", "x", "y", "a", "b", "c", "d", "e", "f"]
MAX_SHIFT = 30
OVERLAP_THRESHOLD = 0.9
DEBUG_SDK_PATH="/data/informatics/CAM/roi_filter/allensdk/"
DEBUG_SCRIPT=os.path.join(DEBUG_SDK_PATH, "allensdk", "internal",
                          "pipeline_modules", "run_roi_filter.py")
DEBUG_OUTPUT_DIRECTORY="/data/informatics/CAM/roi_filter/"


[docs]def get_motion_filepath(experiment_id): return lu.query(""" select CONCAT(wkf.storage_directory, wkf.filename) as path from well_known_files wkf join well_known_file_types wkft on wkft.id = wkf.well_known_file_type_id join ophys_experiments oe on oe.id = wkf.attachable_id where oe.id = {} and wkft.name like 'OphysMotionXyOffsetData'""".format(experiment_id))[0]["path"]
[docs]def get_segmentation_filepath(experiment_id, file_type): return lu.query(""" select CONCAT(wkf.storage_directory, wkf.filename) as path from well_known_files wkf join well_known_file_types wkft on wkft.id = wkf.well_known_file_type_id join ophys_cell_segmentation_runs ocsr on ocsr.id = wkf.attachable_id join ophys_experiments oe on oe.id = ocsr.ophys_experiment_id where oe.id = {} and wkft.name like '{}' and ocsr.current = 't'""".format(experiment_id, file_type))[0]["path"]
[docs]def get_model_info(experiment_id): res = lu.query(""" select CONCAT(wkf.storage_directory, wkf.filename) as path, wkf.id from ophys_experiments oe join ophys_sessions os on os.id = oe.ophys_session_id join projects p on p.id = os.project_id join well_known_files wkf on wkf.attachable_id = p.id join well_known_file_types wkft on wkft.id = wkf.well_known_file_type_id where oe.id = {} and wkft.name = 'RoiLabelModel'""".format(experiment_id))[0] return res["path"], res["id"]
[docs]def get_genotype_info(experiment_id, code): res = lu.query(""" select g.name from ophys_experiments oe join ophys_sessions os on os.id = oe.ophys_session_id join specimens s on s.id = os.specimen_id join donors d on d.id = s.donor_id join donors_genotypes dg on dg.donor_id = d.id join genotypes g on g.id = dg.genotype_id join genotype_types gt on gt.id = g.genotype_type_id where oe.id = {} and gt.code like '{}'""".format(experiment_id, code)) output = set() for line in res: output.add(line["name"]) return list(output)
[docs]def create_input_data(experiment_id): data = {} data["log_0"] = get_motion_filepath(experiment_id) model, model_id = get_model_info(experiment_id) data["roi_label_model"] = model data["roi_label_model_id"] = model_id data["targeted_structure_id"] = lu.query(""" select targeted_structure_id from ophys_experiments oe where oe.id = {}""".format(experiment_id))[0]["targeted_structure_id"] data["imaging_depth"] = lu.query(""" select calculated_depth from ophys_experiments oe where oe.id = {}""".format(experiment_id))[0]["calculated_depth"] data["drivers"] = get_genotype_info(experiment_id, "D") data["reporters"] = get_genotype_info(experiment_id, "R") data["max_int_file"] = get_segmentation_filepath( experiment_id, "OphysSegmentationMaskData") data["object_list"] = get_segmentation_filepath( experiment_id, "OphysSegmentationObjects") return data
[docs]def debug(experiment_id, local=False, sdk_path=DEBUG_SDK_PATH, script=DEBUG_SCRIPT, output_directory=DEBUG_OUTPUT_DIRECTORY): input_data = create_input_data(experiment_id) exp_dir = os.path.join(output_directory, str(experiment_id)) run_module(script, input_data, exp_dir, sdk_path=sdk_path, local=local)
[docs]def load_object_list(filename): '''Load the object list file.''' dataframe = pd.read_csv(filename) dataframe.columns = [column.strip() for column in dataframe.columns] return dataframe
[docs]def is_deprecated_motion_file(filename): '''Check if a file is an old style motion correction file. By agreement, new-style files will always have a header and that header will always contain at least 1 alpha character. ''' with open(filename, "r") as f: return not any([c.isalpha() for c in f.readline()])
[docs]def load_rigid_motion_transform(filename): '''Load the rigid motion transform file.''' if is_deprecated_motion_file(filename): return pd.read_csv(filename, header=None, names=DEPRECATED_MOTION_HEADER) else: return pd.read_csv(filename)
[docs]def load_all_input(data): '''Load all input data from the input json.''' try: object_list_file = data["object_list"] object_data = load_object_list(object_list_file) except KeyError: logging.error("Input json missing object_list") raise except IOError: logging.error("Could not read object list file %s", object_list_file) raise try: rigid_motion_transform_file = data["log_0"] # TODO: update name in LIMS and here motion_data = load_rigid_motion_transform(rigid_motion_transform_file) except KeyError: logging.error("Input json missing log_0") # TODO: update name in LIMS and here raise except IOError: logging.error("Could not read rigid motion transform file %s", rigid_motion_transform_file) raise try: maxint_file = data["max_int_file"] with h5py.File(maxint_file, "r") as f: segmentation_stack = f["data"][...] except KeyError: logging.error("Input json missing max_int_file") raise except IOError: logging.error("Could not read max_int_file file %s", maxint_file) raise try: model_file = data["roi_label_model"] classifier = roi_filter.ROIClassifier.from_file(model_file) except KeyError: logging.error("Input json missing roi_label_model") raise except IOError: logging.error("Could not read roi_label_model file %s", model_file) raise try: depth = float(data["imaging_depth"]) except KeyError: logging.error("Input json missing imaging_depth") raise except ValueError: logging.error("Invalid depth %s", data["imaging_depth"]) raise try: structure_id = str(data["targeted_structure_id"]) except KeyError: logging.error("Input json missing targeted_structure_id") raise try: model_id = data["roi_label_model_id"] except KeyError: logging.error("Input json missing roi_label_model_id") raise try: drivers = data["drivers"] except KeyError: logging.error("Input json drivers") raise try: reporters = data["reporters"] except KeyError: logging.error("Input json missing reporters") raise border = roi_filter_utils.calculate_max_border(motion_data, MAX_SHIFT) rois = roi_filter_utils.get_rois(segmentation_stack, border) rois = roi_filter_utils.order_rois_by_object_list(object_data, rois) result = {"model_id": model_id, "classifier": classifier, "object_data": object_data, "depth": depth, "structure_id": structure_id, "drivers": drivers, "reporters": reporters, "border": border, "rois": rois} return result
[docs]def create_output_data(rois, model_id, border, excluded, unexpected_features): data = {} data["motion_border"] = {"x0": border[RIGHT_SHIFT], "y0": border[DOWN_SHIFT], "x1": border[LEFT_SHIFT], "y1": border[UP_SHIFT]} data["roi_label_model_id"] = model_id data["unexpected_features"] = unexpected_features if rois: data["image"] = {"width": rois[0].img_cols, "height": rois[0].img_rows} json_rois = {} for i, roi in enumerate(rois): json_roi = {} json_roi["x"] = roi.x json_roi["y"] = roi.y json_roi["width"] = roi.width json_roi["height"] = roi.height json_roi["mask"] = roi.mask json_roi["mask_page"] = roi.mask_group json_roi["exclusion_labels"] = roi.labels if roi.labels: json_roi["valid"] = False else: json_roi["valid"] = True # for backwards compatibility json_roi["exclude_code"] = excluded[i] json_rois[roi.label] = json_roi data["rois"] = json_rois return data
[docs]def main(): mod = PipelineModule("Filter Ophys ROIs produced from cell segmentation.") input_data = mod.input_data() data = load_all_input(input_data) model_id = data["model_id"] classifier = data["classifier"] object_data = data["object_data"] depth = data["depth"] structure_id = data["structure_id"] drivers = data["drivers"] reporters = data["reporters"] border = data["border"] rois = data["rois"] label_array = classifier.get_labels(object_data, depth, structure_id, drivers, reporters) rois = roi_filter.apply_labels(rois, label_array, classifier.label_names) rois = roi_filter.label_unions_and_duplicates(rois, OVERLAP_THRESHOLD) output_data = create_output_data(rois, model_id, border, object_data["eXcluded"], classifier.unexpected_features) mod.write_output_data(output_data)
if __name__ == "__main__": main()