Source code for allensdk.internal.pipeline_modules.gbm.generate_gbm_sample_metadata
###
# This program generates a json file containing the GBM sample metadata records from an sql query
# To Do: This will be done by the strategy and later removed from here.
import psycopg2
import json
import sys
from psycopg2.extras import RealDictCursor
[docs]def main(sample_metadata_json_location, db_host, db_port, db_name, db_user, db_passwd):
conn = psycopg2.connect(host=db_host, port=db_port, dbname=db_name, user=db_user, password=db_passwd)
cur = conn.cursor(cursor_factory=RealDictCursor)
cur.execute("select distinct rna.id as rna_well_id, tumor.id as tumor_id, tumor.external_specimen_name as tumor_name"
", block.id as block_id, block.external_specimen_name as block_name, sp.id as specimen_id"
", sp.external_specimen_name as specimen_name, min(poly.id) as polygon_id, st.id as structure_id"
", st.acronym as structure_abbreviation, to_hex(st.red) || to_hex(st.green) || to_hex(st.blue) as "
"structure_color, st.name as structure_name from wells rna join image_series mims on mims.id = "
"rna.image_series_id join specimens sp on sp.id = mims.specimen_id join specimens block on block.id = "
"sp.parent_id join specimens tumor on tumor.id = block.parent_id join avg_microarray_templates mt on "
"mt.image_series_id = mims.id join avg_graphic_objects poly on poly.id = mt.shape_id join structures st "
"on st.id = poly.structure_id join rs_tubes tube on tube.sample_id = rna.id join rna_seq_experiments exp "
"on exp.rs_tube_id = tube.id join rna_seq_analysis_runs_rna_seq_experiments ar2exp on "
"ar2exp.rna_seq_experiment_id = exp.id join analysis_runs ar on ar.id = ar2exp.rna_seq_analysis_run_id "
"join well_known_files fpkm on fpkm.attachable_id = ar.id where rna.sample_id_string like any (array "
"['366-___', '466-___']) and fpkm.published_at is not null group by tumor.id, "
"tumor.external_specimen_name, block.id, block.external_specimen_name, sp.id, sp.external_specimen_name, "
"rna.id, st.id, st.acronym, st.name, structure_color order by rna.id;")
with open(sample_metadata_json_location, 'w') as outfile:
json.dump(cur.fetchall(), outfile, indent=2)
if __name__ == '__main__':
sample_metadata_json_location = sys.argv[1]
db_host = sys.argv[2]
db_port = sys.argv[3]
db_name = sys.argv[4]
db_user = sys.argv[5]
db_passwd = sys.argv[6]
main(sample_metadata_json_location, db_host, db_port, db_name, db_user, db_passwd)