aers_rewrite/commit

cleanup of ascii rewrite

authorCWI eculture 2
Wed Jan 30 16:08:03 2013 +0100
committerCWI eculture 2
Wed Jan 30 16:08:03 2013 +0100
commitef562928093a645c5f36e0115e6d1a739a0f2ee9
tree77c4922464ba586b70bb5b936d3e74e9a7c96c60
parent4df0408dcc1dfeb9608b16c4338c928a1b3edf3d
Diff style: patch stat
diff --git a/lib/ascii_rewrite.pl b/lib/ascii_rewrite.pl
index fab498b..4b75115 100644
--- a/lib/ascii_rewrite.pl
+++ b/lib/ascii_rewrite.pl
@@ -1,5 +1,6 @@
 :- module(ascii_rewrite,
-	  [convert_ascii/0
+	  [convert_ascii/0,
+	   convert_quarter/2
 	  ]).
 
 :- use_module(library(csvrdf)).
@@ -13,33 +14,59 @@
 	table_map/3.
 
 user:file_search_path(ascii, '../fda_data/ascii').
+user:file_search_path(ascii_rdf, '../rdf/aers_ascii').
 
-filename('Report', 'DEMO04Q1.TXT').
-filename('Drug', 'DRUG04Q1.TXT').
-filename('Reaction', 'REAC04Q1.TXT').
-filename('Outcome', 'OUTC04Q1.TXT').
-filename('Source', 'RPSR04Q1.TXT').
-filename('Therapy', 'THER04Q1.TXT').
-filename('Indication', 'INDI04Q1.TXT').
+filename('Report', 'DEMO').
+filename('Drug', 'DRUG').
+filename('Reaction', 'REAC').
+filename('Outcome', 'OUTC').
+filename('Source', 'RPSR').
+filename('Therapy', 'THER').
+filename('Indication', 'INDI').
 
 :- debug(csvrdf).
 
+
+quarter('04Q1').
+quarter('04Q2').
+quarter('04Q3').
+quarter('04Q4').
+
 convert_ascii :-
+	(   quarter(Q),
+	    atom_concat(aers, Q, Graph),
+	    atom_concat(Graph, '.ttl', RDF_FileName),
+	    absolute_file_name(ascii_rdf(RDF_FileName), RDF_File),
+	    convert_quarter(Q, Graph),
+	    rdf_save_turtle(RDF_File, [graph(Graph)]),
+	    rdf_retractall(_,_,_,Graph),
+	    fail
+	;   true
+	).
+
+convert_quarter(Quarter, Graph) :-
+	ascii_to_rdf(Quarter, Graph),
+	rewrite_graph(Graph).
+
+ascii_to_rdf(Quarter, Graph) :-
 	rdf_current_ns(aers, Prefix),
-	(   filename(Name, FileName),
+	FileExt = '.TXT',
+	(   filename(Class, FilePrefix),
+	    concat_atom([FilePrefix,Quarter,FileExt], FileName),
 	    absolute_file_name(ascii(FileName), File),
+	    debug(csvrdf, 'convert ~w to ~w', [FileName, Graph]),
 	    load_csv_as_rdf(File, [prefix(Prefix),
-				   class(Name),
-				   graph(Name),
+				   class(Class),
+				   graph(Graph),
 				   separator(0'$),
 				   match_arity(false)
 				   ]),
-	    rewrite_graph(Name),
 	    fail
 	;   true
 	).
 
 rewrite_graph(Graph) :-
+	debug(csvrdf, 'rewrite ~w', [Graph]),
 	rdf_rewrite(Graph).
 
 
@@ -47,42 +74,130 @@ rewrite_graph(Graph) :-
 % report
 %
 
-assign_drug_uris @@
+assign_report_uris @@
 {S, rdf:type, aers:'Report'},
 {S, aers:isr, Id}\
 {S}
 <=>
-literal_to_id(Id, aers_r, URI),
+literal_n_to_id(Id, aers_r, URI),
 {URI}.
 
+report_image @@
+{_, aers:image, _}
+<=>
+true.
+
+report_followup @@
+{S, aers:i_f_cod, literal(C)}
+<=>
+report_followup_uri(C, URI),
+{S, aers:followup_status, URI}.
+
+report_code @@
+{S, aers:rept_cod, literal(C)}
+<=>
+report_type_uri(C, URI),
+{S, aers:report_type, URI}.
+
+report_occp_code @@
+{S, aers:occp_cod, literal(C)}
+<=>
+report_occ_uri(C, URI),
+{S, aers:reporter_type, URI}.
+
+patient @@
+{S, rdf:type, aers:'Report'}\
+true
+<=>
+rdf_bnode(P),
+{S, aers:patient, P},
+{P, rdf:type, aers:'Patient'}.
+
+patient_gender @@
+{S, aers:patient, P}\
+{S, aers:gndr_cod, literal(GND)}
+<=>
+patient_gender_uri(GND, Gender),
+{P, aers:gender, Gender}.
+
+patient_age @@
+{S, aers:patient, P}\
+{S, aers:age, A},
+{S, aers:age_cod, literal(AC)}?
+<=>
+patient_age_uri(AC, A_URI)?,
+{P, aers:age, A},
+{P, aers:age_type, A_URI}.
+
+patient_weight @@
+{S, aers:patient, P}\
+{S, aers:wt, W},
+{S, aers:wt_cod, literal(WC)}?
+<=>
+patient_weight_uri(WC, W_URI)?,
+{P, aers:weight, W},
+{P, aers:weight_type, W_URI}.
+
+patient_death @@
+{S, aers:patient, P}\
+{S, aers:death_dt, D}
+<=>
+{P, aers:death_dt, D}.
+
 
 % drug
 %
 % URIs of the drugs on the DRUG_SEQ
 
-assign_report_uris @@
+assign_drug_uris @@
+{S, rdf:type, aers:'Drug'},
 {S, aers:drug_seq, Id}\
 {S}
 <=>
-literal_to_id(Id, aers_d, URI),
+literal_n_to_id(Id, aers_d, URI),
 {URI}.
 
 link_drug_report @@
 {S, rdf:type, aers:'Drug'}\
-{S, aers:isr, literal(ISR)}
+{S, aers:isr, ISR}
 <=>
-literal_to_id(ISR, aers_r, R),
+literal_n_to_id(ISR, aers_r, R),
 {R, aers:drug, S}.
 
+drug_role @@
+{S, rdf:type, aers:'Drug'}\
+{S, aers:role_cod, literal(Code)}
+<=>
+drug_role_uri(Code, URI),
+{S, aers:role, URI}.
+
+drug_verbatim @@
+{S, aers:val_vbm, literal(Code)}
+<=>
+drug_name_type_uri(Code, URI),
+{S, aers:name_type, URI}.
+
+drug_dechal @@
+{S, aers:dechal, literal(Code)}
+<=>
+drug_chal_uri(Code, URI),
+{S, aers:dechal, URI}.
+
+drug_rechal @@
+{S, aers:dechal, literal(Code)}
+<=>
+drug_chal_uri(Code, URI),
+{S, aers:dechal, URI}.
+
 
 % reaction
 
 link_reaction_report @@
 {S, rdf:type, aers:'Reaction'},
-{S, aers:isr, literal(ISR)},
+{S, aers:isr, ISR},
 {S, aers:pt, Term}
 <=>
-literal_to_id(ISR, aers_r, R),
+literal_n_to_id(ISR, aers_r, R),
 {R, aers:reaction, Term}.
 
 
@@ -90,10 +205,10 @@ literal_to_id(ISR, aers_r, R),
 
 link_outcome_report @@
 {S, rdf:type, aers:'Outcome'},
-{S, aers:isr, literal(ISR)},
+{S, aers:isr, ISR},
 {S, aers:outc_cod, literal(Code)}
 <=>
-literal_to_id(ISR, aers_r, R),
+literal_n_to_id(ISR, aers_r, R),
 outcome_uri(Code, URI),
 {R, aers:outcome, URI}.
 
@@ -102,10 +217,10 @@ outcome_uri(Code, URI),
 
 link_source_report @@
 {S, rdf:type, aers:'Source'},
-{S, aers:isr, literal(ISR)},
+{S, aers:isr, ISR},
 {S, aers:rpsr_cod, literal(Code)}
 <=>
-literal_to_id(ISR, aers_r, R),
+literal_n_to_id(ISR, aers_r, R),
 source_uri(Code, URI),
 {R, aers:source, URI}.
 
@@ -115,10 +230,9 @@ source_uri(Code, URI),
 link_therapy_drug @@
 {S, rdf:type, aers:'Therapy'}\
 {S, aers:isr, _},
-{S, aers:drug_seq, literal(DRUG_SEQ)}
+{S, aers:drug_seq, DRUG_SEQ}
 <=>
-literal_to_id(DRUG_SEQ, aers_d, Drug),
-{Drug, aers:therapy, S}.
+literal_n_to_id(DRUG_SEQ, aers_d, S).
 
 
 % Indications
@@ -126,13 +240,57 @@ literal_to_id(DRUG_SEQ, aers_d, Drug),
 link_indication_drug @@
 {S, rdf:type, aers:'Indication'},
 {S, aers:isr, _},
-{S, aers:drug_seq, literal(DRUG_SEQ)},
+{S, aers:drug_seq, DRUG_SEQ},
 {S, aers:indi_pt, Term}
 <=>
-literal_to_id(DRUG_SEQ, aers_d, Drug),
+literal_n_to_id(DRUG_SEQ, aers_d, Drug),
 {Drug, aers:indication, Term}.
 
 
+report_followup_uri('I', C) :- !, rdf_equal(C, aers:'report/initial').
+report_followup_uri('F', C) :- !, rdf_equal(C, aers:'report/followup').
+
+report_type_uri('EXP', C) :- !, rdf_equal(C, aers:'report/expedited').
+report_type_uri('PER', C) :- !, rdf_equal(C, aers:'report/periodic').
+report_type_uri('DIR', C) :- !, rdf_equal(C, aers:'report/direct').
+
+report_occ_uri('MD', C) :- !, rdf_equal(C, aers:'reporter/physician').
+report_occ_uri('PH', C) :- !, rdf_equal(C, aers:'reporter/pharmacist').
+report_occ_uri('OT', C) :- !, rdf_equal(C, aers:'reporter/health_professional').
+report_occ_uri('LW', C) :- !, rdf_equal(C, aers:'reporter/lawyer').
+report_occ_uri('CN', C) :- !, rdf_equal(C, aers:'reporter/consumer').
+
+patient_gender_uri(V, V) :- var(V), !.
+patient_gender_uri('UNK', C) :- !, rdf_equal(C, aers:'unknown').
+patient_gender_uri('M', C)   :- !, rdf_equal(C, aers:'gender/male').
+patient_gender_uri('F', C)   :- !, rdf_equal(C, aers:'gender/female').
+patient_gender_uri('NS', C)  :- !, rdf_equal(C, aers:'not_specified').
+
+patient_weight_uri(V, V) :- var(V), !.
+patient_weight_uri('KG', C)  :- !, rdf_equal(C, aers:'weight/kg').
+patient_weight_uri('LBS', C) :- !, rdf_equal(C, aers:'weight/lbs').
+patient_weight_uri('GMS', C) :- !, rdf_equal(C, aers:'weight/gms').
+
+patient_age_uri(V, V) :- var(V), !.
+patient_age_uri('Dec', C) :- !, rdf_equal(C, aers:'duration/decade').
+patient_age_uri('YR', C)  :- !, rdf_equal(C, aers:'duration/year').
+patient_age_uri('MON', C) :- !, rdf_equal(C, aers:'duration/month').
+patient_age_uri('WK', C)  :- !, rdf_equal(C, aers:'duration/week').
+patient_age_uri('DY', C)  :- !, rdf_equal(C, aers:'duration/day').
+patient_age_uri('HR', C)  :- !, rdf_equal(C, aers:'duration/hour').
+
+drug_role_uri('PS', C) :- !, rdf_equal(C, aers:'drug/primary_suspect').
+drug_role_uri('SS', C) :- !, rdf_equal(C, aers:'drug/secondary_suspect').
+drug_role_uri('C', C) :-  !, rdf_equal(C, aers:'drug/concomitant').
+drug_role_uri('I', C) :-  !, rdf_equal(C, aers:'drug/interacting').
+
+drug_name_type_uri('1', C) :- !, rdf_equal(C, aers:'drug/tradename').
+drug_name_type_uri('2', C) :- !, rdf_equal(C, aers:'drug/verbatim').
+
+drug_chal_uri('Y', C) :- !, rdf_equal(C, aers:'drug/positive').
+drug_chal_uri('N', C) :- !, rdf_equal(C, aers:'drug/negative').
+drug_chal_uri('U', C) :- !, rdf_equal(C, aers:'unknown').
+drug_chal_uri('D', C) :- !, rdf_equal(C, aers:'does_not_apply').
 
 outcome_uri('DE', C) :- !, rdf_equal(C, aers:'outcome/death').
 outcome_uri('LT', C) :- !, rdf_equal(C, aers:'outcome/life_threatening').
@@ -140,15 +298,24 @@ outcome_uri('HO', C) :- !, rdf_equal(C, aers:'outcome/hospitalization').
 outcome_uri('DS', C) :- !, rdf_equal(C, aers:'outcome/disability').
 outcome_uri('CA', C) :- !, rdf_equal(C, aers:'outcome/congenital_anomaly').
 outcome_uri('RI', C) :- !, rdf_equal(C, aers:'outcome/required_intervention').
-outcome_uri('OT', C) :- !, rdf_equal(C, aers:'outcome/other').
-
-
-outcome_uri('FGN', C) :- !, rdf_equal(C, aers:'source/foreign').
-outcome_uri('SDY', C) :- !, rdf_equal(C, aers:'source/study').
-outcome_uri('LIT', C) :- !, rdf_equal(C, aers:'source/literature').
-outcome_uri('CSM', C) :- !, rdf_equal(C, aers:'source/consumer').
-outcome_uri('HP', C) :-  !, rdf_equal(C, aers:'source/health_professional').
-outcome_uri('UF', C) :-  !, rdf_equal(C, aers:'source/user_facility').
-outcome_uri('CR', C) :-  !, rdf_equal(C, aers:'source/company_representative').
-outcome_uri('DT', C) :-  !, rdf_equal(C, aers:'source/distributor').
-outcome_uri('OTH', C) :- !, rdf_equal(C, aers:'source/other').
+outcome_uri('OT', C) :- !, rdf_equal(C, aers:'other').
+
+
+source_uri('FGN', C) :- !, rdf_equal(C, aers:'source/foreign').
+source_uri('SDY', C) :- !, rdf_equal(C, aers:'source/study').
+source_uri('LIT', C) :- !, rdf_equal(C, aers:'source/literature').
+source_uri('CSM', C) :- !, rdf_equal(C, aers:'source/consumer').
+source_uri('HP', C) :-  !, rdf_equal(C, aers:'source/health_professional').
+source_uri('UF', C) :-  !, rdf_equal(C, aers:'source/user_facility').
+source_uri('CR', C) :-  !, rdf_equal(C, aers:'source/company_representative').
+source_uri('DT', C) :-  !, rdf_equal(C, aers:'source/distributor').
+source_uri('OTH', C) :- !, rdf_equal(C, aers:'other').
+
+
+literal_n_to_id(Lit, Prefix, URI) :-
+	(   Lit = literal(N),
+	    number(N)
+	->  atom_number(A, N)
+	;   A = Lit
+	),
+	literal_to_id(A, Prefix, URI).