aers_rewrite/commit
cleanup of ascii rewrite
author | CWI eculture 2 |
---|---|
Wed Jan 30 16:08:03 2013 +0100 | |
committer | CWI eculture 2 |
Wed Jan 30 16:08:03 2013 +0100 | |
commit | ef562928093a645c5f36e0115e6d1a739a0f2ee9 |
tree | 77c4922464ba586b70bb5b936d3e74e9a7c96c60 |
parent | 4df0408dcc1dfeb9608b16c4338c928a1b3edf3d |
Diff style: patch stat
diff --git a/lib/ascii_rewrite.pl b/lib/ascii_rewrite.pl index fab498b..4b75115 100644 --- a/lib/ascii_rewrite.pl +++ b/lib/ascii_rewrite.pl @@ -1,5 +1,6 @@ :- module(ascii_rewrite, - [convert_ascii/0 + [convert_ascii/0, + convert_quarter/2 ]). :- use_module(library(csvrdf)). @@ -13,33 +14,59 @@ table_map/3. user:file_search_path(ascii, '../fda_data/ascii'). +user:file_search_path(ascii_rdf, '../rdf/aers_ascii'). -filename('Report', 'DEMO04Q1.TXT'). -filename('Drug', 'DRUG04Q1.TXT'). -filename('Reaction', 'REAC04Q1.TXT'). -filename('Outcome', 'OUTC04Q1.TXT'). -filename('Source', 'RPSR04Q1.TXT'). -filename('Therapy', 'THER04Q1.TXT'). -filename('Indication', 'INDI04Q1.TXT'). +filename('Report', 'DEMO'). +filename('Drug', 'DRUG'). +filename('Reaction', 'REAC'). +filename('Outcome', 'OUTC'). +filename('Source', 'RPSR'). +filename('Therapy', 'THER'). +filename('Indication', 'INDI'). :- debug(csvrdf). + +quarter('04Q1'). +quarter('04Q2'). +quarter('04Q3'). +quarter('04Q4'). + convert_ascii :- + ( quarter(Q), + atom_concat(aers, Q, Graph), + atom_concat(Graph, '.ttl', RDF_FileName), + absolute_file_name(ascii_rdf(RDF_FileName), RDF_File), + convert_quarter(Q, Graph), + rdf_save_turtle(RDF_File, [graph(Graph)]), + rdf_retractall(_,_,_,Graph), + fail + ; true + ). + +convert_quarter(Quarter, Graph) :- + ascii_to_rdf(Quarter, Graph), + rewrite_graph(Graph). + +ascii_to_rdf(Quarter, Graph) :- rdf_current_ns(aers, Prefix), - ( filename(Name, FileName), + FileExt = '.TXT', + ( filename(Class, FilePrefix), + concat_atom([FilePrefix,Quarter,FileExt], FileName), absolute_file_name(ascii(FileName), File), + debug(csvrdf, 'convert ~w to ~w', [FileName, Graph]), load_csv_as_rdf(File, [prefix(Prefix), - class(Name), - graph(Name), + class(Class), + graph(Graph), separator(0'$), match_arity(false) ]), - rewrite_graph(Name), fail ; true ). rewrite_graph(Graph) :- + debug(csvrdf, 'rewrite ~w', [Graph]), rdf_rewrite(Graph). @@ -47,42 +74,130 @@ rewrite_graph(Graph) :- % report % -assign_drug_uris @@ +assign_report_uris @@ {S, rdf:type, aers:'Report'}, {S, aers:isr, Id}\ {S} <=> -literal_to_id(Id, aers_r, URI), +literal_n_to_id(Id, aers_r, URI), {URI}. +report_image @@ +{_, aers:image, _} +<=> +true. + +report_followup @@ +{S, aers:i_f_cod, literal(C)} +<=> +report_followup_uri(C, URI), +{S, aers:followup_status, URI}. + +report_code @@ +{S, aers:rept_cod, literal(C)} +<=> +report_type_uri(C, URI), +{S, aers:report_type, URI}. + +report_occp_code @@ +{S, aers:occp_cod, literal(C)} +<=> +report_occ_uri(C, URI), +{S, aers:reporter_type, URI}. + +patient @@ +{S, rdf:type, aers:'Report'}\ +true +<=> +rdf_bnode(P), +{S, aers:patient, P}, +{P, rdf:type, aers:'Patient'}. + +patient_gender @@ +{S, aers:patient, P}\ +{S, aers:gndr_cod, literal(GND)} +<=> +patient_gender_uri(GND, Gender), +{P, aers:gender, Gender}. + +patient_age @@ +{S, aers:patient, P}\ +{S, aers:age, A}, +{S, aers:age_cod, literal(AC)}? +<=> +patient_age_uri(AC, A_URI)?, +{P, aers:age, A}, +{P, aers:age_type, A_URI}. + +patient_weight @@ +{S, aers:patient, P}\ +{S, aers:wt, W}, +{S, aers:wt_cod, literal(WC)}? +<=> +patient_weight_uri(WC, W_URI)?, +{P, aers:weight, W}, +{P, aers:weight_type, W_URI}. + +patient_death @@ +{S, aers:patient, P}\ +{S, aers:death_dt, D} +<=> +{P, aers:death_dt, D}. + % drug % % URIs of the drugs on the DRUG_SEQ -assign_report_uris @@ +assign_drug_uris @@ +{S, rdf:type, aers:'Drug'}, {S, aers:drug_seq, Id}\ {S} <=> -literal_to_id(Id, aers_d, URI), +literal_n_to_id(Id, aers_d, URI), {URI}. link_drug_report @@ {S, rdf:type, aers:'Drug'}\ -{S, aers:isr, literal(ISR)} +{S, aers:isr, ISR} <=> -literal_to_id(ISR, aers_r, R), +literal_n_to_id(ISR, aers_r, R), {R, aers:drug, S}. +drug_role @@ +{S, rdf:type, aers:'Drug'}\ +{S, aers:role_cod, literal(Code)} +<=> +drug_role_uri(Code, URI), +{S, aers:role, URI}. + +drug_verbatim @@ +{S, aers:val_vbm, literal(Code)} +<=> +drug_name_type_uri(Code, URI), +{S, aers:name_type, URI}. + +drug_dechal @@ +{S, aers:dechal, literal(Code)} +<=> +drug_chal_uri(Code, URI), +{S, aers:dechal, URI}. + +drug_rechal @@ +{S, aers:dechal, literal(Code)} +<=> +drug_chal_uri(Code, URI), +{S, aers:dechal, URI}. + % reaction link_reaction_report @@ {S, rdf:type, aers:'Reaction'}, -{S, aers:isr, literal(ISR)}, +{S, aers:isr, ISR}, {S, aers:pt, Term} <=> -literal_to_id(ISR, aers_r, R), +literal_n_to_id(ISR, aers_r, R), {R, aers:reaction, Term}. @@ -90,10 +205,10 @@ literal_to_id(ISR, aers_r, R), link_outcome_report @@ {S, rdf:type, aers:'Outcome'}, -{S, aers:isr, literal(ISR)}, +{S, aers:isr, ISR}, {S, aers:outc_cod, literal(Code)} <=> -literal_to_id(ISR, aers_r, R), +literal_n_to_id(ISR, aers_r, R), outcome_uri(Code, URI), {R, aers:outcome, URI}. @@ -102,10 +217,10 @@ outcome_uri(Code, URI), link_source_report @@ {S, rdf:type, aers:'Source'}, -{S, aers:isr, literal(ISR)}, +{S, aers:isr, ISR}, {S, aers:rpsr_cod, literal(Code)} <=> -literal_to_id(ISR, aers_r, R), +literal_n_to_id(ISR, aers_r, R), source_uri(Code, URI), {R, aers:source, URI}. @@ -115,10 +230,9 @@ source_uri(Code, URI), link_therapy_drug @@ {S, rdf:type, aers:'Therapy'}\ {S, aers:isr, _}, -{S, aers:drug_seq, literal(DRUG_SEQ)} +{S, aers:drug_seq, DRUG_SEQ} <=> -literal_to_id(DRUG_SEQ, aers_d, Drug), -{Drug, aers:therapy, S}. +literal_n_to_id(DRUG_SEQ, aers_d, S). % Indications @@ -126,13 +240,57 @@ literal_to_id(DRUG_SEQ, aers_d, Drug), link_indication_drug @@ {S, rdf:type, aers:'Indication'}, {S, aers:isr, _}, -{S, aers:drug_seq, literal(DRUG_SEQ)}, +{S, aers:drug_seq, DRUG_SEQ}, {S, aers:indi_pt, Term} <=> -literal_to_id(DRUG_SEQ, aers_d, Drug), +literal_n_to_id(DRUG_SEQ, aers_d, Drug), {Drug, aers:indication, Term}. +report_followup_uri('I', C) :- !, rdf_equal(C, aers:'report/initial'). +report_followup_uri('F', C) :- !, rdf_equal(C, aers:'report/followup'). + +report_type_uri('EXP', C) :- !, rdf_equal(C, aers:'report/expedited'). +report_type_uri('PER', C) :- !, rdf_equal(C, aers:'report/periodic'). +report_type_uri('DIR', C) :- !, rdf_equal(C, aers:'report/direct'). + +report_occ_uri('MD', C) :- !, rdf_equal(C, aers:'reporter/physician'). +report_occ_uri('PH', C) :- !, rdf_equal(C, aers:'reporter/pharmacist'). +report_occ_uri('OT', C) :- !, rdf_equal(C, aers:'reporter/health_professional'). +report_occ_uri('LW', C) :- !, rdf_equal(C, aers:'reporter/lawyer'). +report_occ_uri('CN', C) :- !, rdf_equal(C, aers:'reporter/consumer'). + +patient_gender_uri(V, V) :- var(V), !. +patient_gender_uri('UNK', C) :- !, rdf_equal(C, aers:'unknown'). +patient_gender_uri('M', C) :- !, rdf_equal(C, aers:'gender/male'). +patient_gender_uri('F', C) :- !, rdf_equal(C, aers:'gender/female'). +patient_gender_uri('NS', C) :- !, rdf_equal(C, aers:'not_specified'). + +patient_weight_uri(V, V) :- var(V), !. +patient_weight_uri('KG', C) :- !, rdf_equal(C, aers:'weight/kg'). +patient_weight_uri('LBS', C) :- !, rdf_equal(C, aers:'weight/lbs'). +patient_weight_uri('GMS', C) :- !, rdf_equal(C, aers:'weight/gms'). + +patient_age_uri(V, V) :- var(V), !. +patient_age_uri('Dec', C) :- !, rdf_equal(C, aers:'duration/decade'). +patient_age_uri('YR', C) :- !, rdf_equal(C, aers:'duration/year'). +patient_age_uri('MON', C) :- !, rdf_equal(C, aers:'duration/month'). +patient_age_uri('WK', C) :- !, rdf_equal(C, aers:'duration/week'). +patient_age_uri('DY', C) :- !, rdf_equal(C, aers:'duration/day'). +patient_age_uri('HR', C) :- !, rdf_equal(C, aers:'duration/hour'). + +drug_role_uri('PS', C) :- !, rdf_equal(C, aers:'drug/primary_suspect'). +drug_role_uri('SS', C) :- !, rdf_equal(C, aers:'drug/secondary_suspect'). +drug_role_uri('C', C) :- !, rdf_equal(C, aers:'drug/concomitant'). +drug_role_uri('I', C) :- !, rdf_equal(C, aers:'drug/interacting'). + +drug_name_type_uri('1', C) :- !, rdf_equal(C, aers:'drug/tradename'). +drug_name_type_uri('2', C) :- !, rdf_equal(C, aers:'drug/verbatim'). + +drug_chal_uri('Y', C) :- !, rdf_equal(C, aers:'drug/positive'). +drug_chal_uri('N', C) :- !, rdf_equal(C, aers:'drug/negative'). +drug_chal_uri('U', C) :- !, rdf_equal(C, aers:'unknown'). +drug_chal_uri('D', C) :- !, rdf_equal(C, aers:'does_not_apply'). outcome_uri('DE', C) :- !, rdf_equal(C, aers:'outcome/death'). outcome_uri('LT', C) :- !, rdf_equal(C, aers:'outcome/life_threatening'). @@ -140,15 +298,24 @@ outcome_uri('HO', C) :- !, rdf_equal(C, aers:'outcome/hospitalization'). outcome_uri('DS', C) :- !, rdf_equal(C, aers:'outcome/disability'). outcome_uri('CA', C) :- !, rdf_equal(C, aers:'outcome/congenital_anomaly'). outcome_uri('RI', C) :- !, rdf_equal(C, aers:'outcome/required_intervention'). -outcome_uri('OT', C) :- !, rdf_equal(C, aers:'outcome/other'). - - -outcome_uri('FGN', C) :- !, rdf_equal(C, aers:'source/foreign'). -outcome_uri('SDY', C) :- !, rdf_equal(C, aers:'source/study'). -outcome_uri('LIT', C) :- !, rdf_equal(C, aers:'source/literature'). -outcome_uri('CSM', C) :- !, rdf_equal(C, aers:'source/consumer'). -outcome_uri('HP', C) :- !, rdf_equal(C, aers:'source/health_professional'). -outcome_uri('UF', C) :- !, rdf_equal(C, aers:'source/user_facility'). -outcome_uri('CR', C) :- !, rdf_equal(C, aers:'source/company_representative'). -outcome_uri('DT', C) :- !, rdf_equal(C, aers:'source/distributor'). -outcome_uri('OTH', C) :- !, rdf_equal(C, aers:'source/other'). +outcome_uri('OT', C) :- !, rdf_equal(C, aers:'other'). + + +source_uri('FGN', C) :- !, rdf_equal(C, aers:'source/foreign'). +source_uri('SDY', C) :- !, rdf_equal(C, aers:'source/study'). +source_uri('LIT', C) :- !, rdf_equal(C, aers:'source/literature'). +source_uri('CSM', C) :- !, rdf_equal(C, aers:'source/consumer'). +source_uri('HP', C) :- !, rdf_equal(C, aers:'source/health_professional'). +source_uri('UF', C) :- !, rdf_equal(C, aers:'source/user_facility'). +source_uri('CR', C) :- !, rdf_equal(C, aers:'source/company_representative'). +source_uri('DT', C) :- !, rdf_equal(C, aers:'source/distributor'). +source_uri('OTH', C) :- !, rdf_equal(C, aers:'other'). + + +literal_n_to_id(Lit, Prefix, URI) :- + ( Lit = literal(N), + number(N) + -> atom_number(A, N) + ; A = Lit + ), + literal_to_id(A, Prefix, URI).