Source code for allensdk.internal.pipeline_modules.gbm.generate_gbm_sample_metadata

###
# This program generates a json file containing the GBM sample metadata records from an sql query

# To Do: This will be done by the strategy and later removed from here.

import psycopg2
import json
import sys
from psycopg2.extras import RealDictCursor


[docs]def main(sample_metadata_json_location, db_host, db_port, db_name, db_user, db_passwd): conn = psycopg2.connect(host=db_host, port=db_port, dbname=db_name, user=db_user, password=db_passwd) cur = conn.cursor(cursor_factory=RealDictCursor) cur.execute("select distinct rna.id as rna_well_id, tumor.id as tumor_id, tumor.external_specimen_name as tumor_name" ", block.id as block_id, block.external_specimen_name as block_name, sp.id as specimen_id" ", sp.external_specimen_name as specimen_name, min(poly.id) as polygon_id, st.id as structure_id" ", st.acronym as structure_abbreviation, to_hex(st.red) || to_hex(st.green) || to_hex(st.blue) as " "structure_color, st.name as structure_name from wells rna join image_series mims on mims.id = " "rna.image_series_id join specimens sp on sp.id = mims.specimen_id join specimens block on block.id = " "sp.parent_id join specimens tumor on tumor.id = block.parent_id join avg_microarray_templates mt on " "mt.image_series_id = mims.id join avg_graphic_objects poly on poly.id = mt.shape_id join structures st " "on st.id = poly.structure_id join rs_tubes tube on tube.sample_id = rna.id join rna_seq_experiments exp " "on exp.rs_tube_id = tube.id join rna_seq_analysis_runs_rna_seq_experiments ar2exp on " "ar2exp.rna_seq_experiment_id = exp.id join analysis_runs ar on ar.id = ar2exp.rna_seq_analysis_run_id " "join well_known_files fpkm on fpkm.attachable_id = ar.id where rna.sample_id_string like any (array " "['366-___', '466-___']) and fpkm.published_at is not null group by tumor.id, " "tumor.external_specimen_name, block.id, block.external_specimen_name, sp.id, sp.external_specimen_name, " "rna.id, st.id, st.acronym, st.name, structure_color order by rna.id;") with open(sample_metadata_json_location, 'w') as outfile: json.dump(cur.fetchall(), outfile, indent=2)
if __name__ == '__main__': sample_metadata_json_location = sys.argv[1] db_host = sys.argv[2] db_port = sys.argv[3] db_name = sys.argv[4] db_user = sys.argv[5] db_passwd = sys.argv[6] main(sample_metadata_json_location, db_host, db_port, db_name, db_user, db_passwd)