aers_rewrite/commit

FIX bug in cvsrdf that caused duplicate entries voor each row.

authorCWI eculture 2
Thu Jan 31 09:09:28 2013 +0100
committerCWI eculture 2
Thu Jan 31 09:09:28 2013 +0100
commit96f561dec79e0af90d38d8afb38b6fd46488d9cf
treec4fef8ebc6440652ff3b3b42254f31b6dcec2f7f
parentef562928093a645c5f36e0115e6d1a739a0f2ee9
Diff style: patch stat
diff --git a/lib/ascii_rewrite.pl b/lib/ascii_rewrite.pl
index 4b75115..1747180 100644
--- a/lib/ascii_rewrite.pl
+++ b/lib/ascii_rewrite.pl
@@ -9,6 +9,8 @@
 :- use_module(library(xmlrdf/rdf_rewrite)).
 :- use_module(library(xmlrdf/rdf_convert_util)).
 
+:- rdf_meta
+	code_to_uri(+,+,r).
 
 :- dynamic
 	table_map/3.
@@ -27,10 +29,40 @@ filename('Indication', 'INDI').
 :- debug(csvrdf).
 
 
-quarter('04Q1').
+%quarter('04Q1').
 quarter('04Q2').
 quarter('04Q3').
 quarter('04Q4').
+quarter('05Q1').
+quarter('05Q2').
+quarter('05Q3').
+quarter('05Q4').
+quarter('06Q1').
+quarter('06Q2').
+quarter('06Q3').
+quarter('06Q4').
+quarter('07Q1').
+quarter('07Q2').
+quarter('07Q3').
+quarter('07Q4').
+quarter('08Q1').
+quarter('08Q2').
+quarter('08Q3').
+quarter('08Q4').
+quarter('09Q1').
+quarter('09Q2').
+quarter('09Q3').
+quarter('09Q4').
+quarter('10Q1').
+quarter('10Q2').
+quarter('10Q3').
+quarter('10Q4').
+quarter('11Q1').
+quarter('11Q2').
+quarter('11Q3').
+quarter('11Q4').
+quarter('12Q1').
+quarter('12Q2').
 
 convert_ascii :-
 	(   quarter(Q),
@@ -38,7 +70,9 @@ convert_ascii :-
 	    atom_concat(Graph, '.ttl', RDF_FileName),
 	    absolute_file_name(ascii_rdf(RDF_FileName), RDF_File),
 	    convert_quarter(Q, Graph),
+	    debug(csvrdf, 'save ~w to ~w', [Graph, RDF_File]),
 	    rdf_save_turtle(RDF_File, [graph(Graph)]),
+	    debug(csvrdf, 'remove ~w', [Graph]),
 	    rdf_retractall(_,_,_,Graph),
 	    fail
 	;   true
@@ -72,250 +106,217 @@ rewrite_graph(Graph) :-
 
 
 % report
-%
 
-assign_report_uris @@
+report_uri @@
 {S, rdf:type, aers:'Report'},
-{S, aers:isr, Id}\
+{S, aers:isr, literal(Id)}\
 {S}
 <=>
-literal_n_to_id(Id, aers_r, URI),
+id_to_uri(Id, aers_r, URI),
 {URI}.
 
-report_image @@
-{_, aers:image, _}
+report_properties @@
+{S, rdf:type, aers:'Report'}\
+{S, aers:image, _}?,
+{S, aers:i_f_cod, literal(Follow)}?,
+{S, aers:rept_cod, literal(Type)}?,
+{S, aers:occp_cod, literal(Reporter)}?
 <=>
-true.
+code_to_uri(followup, Follow, Follow_URI),
+code_to_uri(type, Type, Type_URI),
+code_to_uri(reporter, Reporter, Reporter_URI),
+{S, aers:followup_status, Follow_URI},
+{S, aers:report_type, Type_URI},
+{S, aers:reporter_type, Reporter_URI}.
 
-report_followup @@
-{S, aers:i_f_cod, literal(C)}
-<=>
-report_followup_uri(C, URI),
-{S, aers:followup_status, URI}.
 
-report_code @@
-{S, aers:rept_cod, literal(C)}
-<=>
-report_type_uri(C, URI),
-{S, aers:report_type, URI}.
-
-report_occp_code @@
-{S, aers:occp_cod, literal(C)}
-<=>
-report_occ_uri(C, URI),
-{S, aers:reporter_type, URI}.
+% patient
 
 patient @@
 {S, rdf:type, aers:'Report'}\
-true
-<=>
-rdf_bnode(P),
-{S, aers:patient, P},
-{P, rdf:type, aers:'Patient'}.
-
-patient_gender @@
-{S, aers:patient, P}\
-{S, aers:gndr_cod, literal(GND)}
-<=>
-patient_gender_uri(GND, Gender),
-{P, aers:gender, Gender}.
-
-patient_age @@
-{S, aers:patient, P}\
-{S, aers:age, A},
-{S, aers:age_cod, literal(AC)}?
-<=>
-patient_age_uri(AC, A_URI)?,
-{P, aers:age, A},
-{P, aers:age_type, A_URI}.
-
-patient_weight @@
-{S, aers:patient, P}\
-{S, aers:wt, W},
-{S, aers:wt_cod, literal(WC)}?
-<=>
-patient_weight_uri(WC, W_URI)?,
-{P, aers:weight, W},
-{P, aers:weight_type, W_URI}.
-
-patient_death @@
-{S, aers:patient, P}\
-{S, aers:death_dt, D}
+{S, aers:gndr_cod, literal(GND)}?,
+{S, aers:age, A}?,
+{S, aers:age_cod, literal(AC)}?,
+{S, aers:wt, W}?,
+{S, aers:wt_cod, literal(WC)}?,
+{S, aers:death_dt, D}?
 <=>
-{P, aers:death_dt, D}.
+at_least_one_given([GND,A,W,D]),
+code_to_uri(gender, GND, Gender),
+code_to_uri(age, AC, A_URI),
+code_to_uri(weight, WC, W_URI),
+{S, aers:patient,
+ bnode([ aers:gender = Gender,
+	 aers:age = A,
+	 aers:age_type = A_URI,
+	 aers:weight = W,
+	 aers:weight_type = W_URI,
+	 aers:death_dt = D
+       ])
+}.
 
 
 % drug
 %
-% URIs of the drugs on the DRUG_SEQ
+% aers:drug_seq is replaced by drug_id so that we can quickly find drugs
+% later on
 
-assign_drug_uris @@
+drug_uri @@
 {S, rdf:type, aers:'Drug'},
-{S, aers:drug_seq, Id}\
+{S, aers:isr, literal(ISR)},
+{S, aers:drug_seq, literal(Drug_Seq)}\
 {S}
 <=>
-literal_n_to_id(Id, aers_d, URI),
-{URI}.
-
-link_drug_report @@
-{S, rdf:type, aers:'Drug'}\
-{S, aers:isr, ISR}
-<=>
-literal_n_to_id(ISR, aers_r, R),
-{R, aers:drug, S}.
+id_to_uri(ISR, aers_r, R),
+id_to_uri(Drug_Seq, aers_d, D),
+{D},
+{R, aers:drug, D}.
 
-drug_role @@
+drug_properties @@
 {S, rdf:type, aers:'Drug'}\
-{S, aers:role_cod, literal(Code)}
-<=>
-drug_role_uri(Code, URI),
-{S, aers:role, URI}.
-
-drug_verbatim @@
-{S, aers:val_vbm, literal(Code)}
-<=>
-drug_name_type_uri(Code, URI),
-{S, aers:name_type, URI}.
-
-drug_dechal @@
-{S, aers:dechal, literal(Code)}
-<=>
-drug_chal_uri(Code, URI),
-{S, aers:dechal, URI}.
-
-drug_rechal @@
-{S, aers:dechal, literal(Code)}
+{S, aers:isr, _},
+{S, aers:role_cod, literal(Role)}?,
+{S, aers:val_vbm, literal(Type)}?,
+{S, aers:dechal, literal(DChal)}?,
+{S, aers:rechal, literal(RChal)}?
 <=>
-drug_chal_uri(Code, URI),
-{S, aers:dechal, URI}.
+code_to_uri(role, Role, Role_URI),
+code_to_uri(name_type, Type, Type_URI),
+code_to_uri(chal, DChal, DChal_URI),
+code_to_uri(chal, RChal, RChal_URI),
+{S, aers:role, Role_URI},
+{S, aers:name_type, Type_URI},
+{S, aers:dechal, DChal_URI},
+{S, aers:rechal, RChal_URI}.
 
 
 % reaction
 
-link_reaction_report @@
+reaction @@
 {S, rdf:type, aers:'Reaction'},
-{S, aers:isr, ISR},
+{S, aers:isr, literal(ISR)},
 {S, aers:pt, Term}
 <=>
-literal_n_to_id(ISR, aers_r, R),
+id_to_uri(ISR, aers_r, R),
 {R, aers:reaction, Term}.
 
 
 % outcome
 
-link_outcome_report @@
+outcome @@
 {S, rdf:type, aers:'Outcome'},
-{S, aers:isr, ISR},
+{S, aers:isr, literal(ISR)},
 {S, aers:outc_cod, literal(Code)}
 <=>
-literal_n_to_id(ISR, aers_r, R),
-outcome_uri(Code, URI),
+id_to_uri(ISR, aers_r, R),
+code_to_uri(outcome, Code, URI),
 {R, aers:outcome, URI}.
 
 
 % source
 
-link_source_report @@
+source @@
 {S, rdf:type, aers:'Source'},
-{S, aers:isr, ISR},
+{S, aers:isr, literal(ISR)},
 {S, aers:rpsr_cod, literal(Code)}
 <=>
-literal_n_to_id(ISR, aers_r, R),
-source_uri(Code, URI),
+id_to_uri(ISR, aers_r, R),
+code_to_uri(source, Code, URI),
 {R, aers:source, URI}.
 
 
 % Therapy
 
-link_therapy_drug @@
-{S, rdf:type, aers:'Therapy'}\
-{S, aers:isr, _},
-{S, aers:drug_seq, DRUG_SEQ}
+therapy @@
+{S, rdf:type, aers:'Therapy'},
+{S, aers:drug_seq, literal(DrugSeq)},
+{S, aers:isr, _}
 <=>
-literal_n_to_id(DRUG_SEQ, aers_d, S).
+id_to_uri(DrugSeq, aers_d, D),
+{D, aers:therapy, S}.
 
 
 % Indications
 
-link_indication_drug @@
+indication @@
 {S, rdf:type, aers:'Indication'},
+{S, aers:drug_seq, literal(DrugSeq)},
 {S, aers:isr, _},
-{S, aers:drug_seq, DRUG_SEQ},
 {S, aers:indi_pt, Term}
 <=>
-literal_n_to_id(DRUG_SEQ, aers_d, Drug),
-{Drug, aers:indication, Term}.
-
-
-report_followup_uri('I', C) :- !, rdf_equal(C, aers:'report/initial').
-report_followup_uri('F', C) :- !, rdf_equal(C, aers:'report/followup').
-
-report_type_uri('EXP', C) :- !, rdf_equal(C, aers:'report/expedited').
-report_type_uri('PER', C) :- !, rdf_equal(C, aers:'report/periodic').
-report_type_uri('DIR', C) :- !, rdf_equal(C, aers:'report/direct').
-
-report_occ_uri('MD', C) :- !, rdf_equal(C, aers:'reporter/physician').
-report_occ_uri('PH', C) :- !, rdf_equal(C, aers:'reporter/pharmacist').
-report_occ_uri('OT', C) :- !, rdf_equal(C, aers:'reporter/health_professional').
-report_occ_uri('LW', C) :- !, rdf_equal(C, aers:'reporter/lawyer').
-report_occ_uri('CN', C) :- !, rdf_equal(C, aers:'reporter/consumer').
-
-patient_gender_uri(V, V) :- var(V), !.
-patient_gender_uri('UNK', C) :- !, rdf_equal(C, aers:'unknown').
-patient_gender_uri('M', C)   :- !, rdf_equal(C, aers:'gender/male').
-patient_gender_uri('F', C)   :- !, rdf_equal(C, aers:'gender/female').
-patient_gender_uri('NS', C)  :- !, rdf_equal(C, aers:'not_specified').
-
-patient_weight_uri(V, V) :- var(V), !.
-patient_weight_uri('KG', C)  :- !, rdf_equal(C, aers:'weight/kg').
-patient_weight_uri('LBS', C) :- !, rdf_equal(C, aers:'weight/lbs').
-patient_weight_uri('GMS', C) :- !, rdf_equal(C, aers:'weight/gms').
-
-patient_age_uri(V, V) :- var(V), !.
-patient_age_uri('Dec', C) :- !, rdf_equal(C, aers:'duration/decade').
-patient_age_uri('YR', C)  :- !, rdf_equal(C, aers:'duration/year').
-patient_age_uri('MON', C) :- !, rdf_equal(C, aers:'duration/month').
-patient_age_uri('WK', C)  :- !, rdf_equal(C, aers:'duration/week').
-patient_age_uri('DY', C)  :- !, rdf_equal(C, aers:'duration/day').
-patient_age_uri('HR', C)  :- !, rdf_equal(C, aers:'duration/hour').
-
-drug_role_uri('PS', C) :- !, rdf_equal(C, aers:'drug/primary_suspect').
-drug_role_uri('SS', C) :- !, rdf_equal(C, aers:'drug/secondary_suspect').
-drug_role_uri('C', C) :-  !, rdf_equal(C, aers:'drug/concomitant').
-drug_role_uri('I', C) :-  !, rdf_equal(C, aers:'drug/interacting').
-
-drug_name_type_uri('1', C) :- !, rdf_equal(C, aers:'drug/tradename').
-drug_name_type_uri('2', C) :- !, rdf_equal(C, aers:'drug/verbatim').
-
-drug_chal_uri('Y', C) :- !, rdf_equal(C, aers:'drug/positive').
-drug_chal_uri('N', C) :- !, rdf_equal(C, aers:'drug/negative').
-drug_chal_uri('U', C) :- !, rdf_equal(C, aers:'unknown').
-drug_chal_uri('D', C) :- !, rdf_equal(C, aers:'does_not_apply').
-
-outcome_uri('DE', C) :- !, rdf_equal(C, aers:'outcome/death').
-outcome_uri('LT', C) :- !, rdf_equal(C, aers:'outcome/life_threatening').
-outcome_uri('HO', C) :- !, rdf_equal(C, aers:'outcome/hospitalization').
-outcome_uri('DS', C) :- !, rdf_equal(C, aers:'outcome/disability').
-outcome_uri('CA', C) :- !, rdf_equal(C, aers:'outcome/congenital_anomaly').
-outcome_uri('RI', C) :- !, rdf_equal(C, aers:'outcome/required_intervention').
-outcome_uri('OT', C) :- !, rdf_equal(C, aers:'other').
-
-
-source_uri('FGN', C) :- !, rdf_equal(C, aers:'source/foreign').
-source_uri('SDY', C) :- !, rdf_equal(C, aers:'source/study').
-source_uri('LIT', C) :- !, rdf_equal(C, aers:'source/literature').
-source_uri('CSM', C) :- !, rdf_equal(C, aers:'source/consumer').
-source_uri('HP', C) :-  !, rdf_equal(C, aers:'source/health_professional').
-source_uri('UF', C) :-  !, rdf_equal(C, aers:'source/user_facility').
-source_uri('CR', C) :-  !, rdf_equal(C, aers:'source/company_representative').
-source_uri('DT', C) :-  !, rdf_equal(C, aers:'source/distributor').
-source_uri('OTH', C) :- !, rdf_equal(C, aers:'other').
-
-
-literal_n_to_id(Lit, Prefix, URI) :-
-	(   Lit = literal(N),
-	    number(N)
-	->  atom_number(A, N)
-	;   A = Lit
-	),
-	literal_to_id(A, Prefix, URI).
+id_to_uri(DrugSeq, aers_d, D),
+{D, aers:indication, Term}.
+
+
+code_to_uri(_, V, V) :- var(V), !.
+
+code_to_uri(followup, 'I', aers:'report/initial') :- !.
+code_to_uri(followup, 'F', aers:'report/followup'):- !.
+
+code_to_uri(type, 'EXP', aers:'report/expedited') :- !.
+code_to_uri(type, 'PER', aers:'report/periodic') :- !.
+code_to_uri(type, 'DIR', aers:'report/direct') :- !.
+
+code_to_uri(reporter, 'MD', aers:'reporter/physician') :- !.
+code_to_uri(reporter, 'PH', aers:'reporter/pharmacist') :- !.
+code_to_uri(reporter, 'OT', aers:'reporter/health_professional') :- !.
+code_to_uri(reporter, 'LW', aers:'reporter/lawyer') :- !.
+code_to_uri(reporter, 'CN', aers:'reporter/consumer') :- !.
+
+code_to_uri(gender, 'UNK', aers:'unknown') :- !.
+code_to_uri(gender, 'M',   aers:'gender/male') :- !.
+code_to_uri(gender, 'F',   aers:'gender/female') :- !.
+code_to_uri(gender, 'NS',  aers:'not_specified') :- !.
+
+code_to_uri(weight, 'KG',  aers:'weight/kg') :- !.
+code_to_uri(weight, 'LBS', aers:'weight/lbs') :- !.
+code_to_uri(weight, 'GMS', aers:'weight/gms') :- !.
+
+code_to_uri(age, 'Dec',	aers:'duration/decade') :- !.
+code_to_uri(age, 'YR',  aers:'duration/year') :- !.
+code_to_uri(age, 'MON', aers:'duration/month') :- !.
+code_to_uri(age, 'WK',  aers:'duration/week') :- !.
+code_to_uri(age, 'DY',  aers:'duration/day') :- !.
+code_to_uri(age, 'HR',  aers:'duration/hour') :- !.
+
+code_to_uri(role, 'PS', aers:'drug/primary_suspect') :- !.
+code_to_uri(role, 'SS',	aers:'drug/secondary_suspect') :- !.
+code_to_uri(role, 'C',  aers:'drug/concomitant') :- !.
+code_to_uri(role, 'I',  aers:'drug/interacting') :- !.
+
+code_to_uri(name_type, '1', aers:'drug/tradename') :- !.
+code_to_uri(name_type, '2', aers:'drug/verbatim') :- !.
+
+code_to_uri(chal, 'Y', aers:'drug/positive') :- !.
+code_to_uri(chal, 'N', aers:'drug/negative') :- !.
+code_to_uri(chal, 'U', aers:'drug/unknown') :- !.
+code_to_uri(chal, 'D', aers:'drug/does_not_apply') :- !.
+
+code_to_uri(outcome, 'DE', aers:'outcome/death') :- !.
+code_to_uri(outcome, 'LT', aers:'outcome/life_threatening') :- !.
+code_to_uri(outcome, 'HO', aers:'outcome/hospitalization') :- !.
+code_to_uri(outcome, 'DS', aers:'outcome/disability') :- !.
+code_to_uri(outcome, 'CA', aers:'outcome/congenital_anomaly') :- !.
+code_to_uri(outcome, 'RI', aers:'outcome/required_intervention') :- !.
+code_to_uri(outcome, 'OT', aers:'outcome/other') :- !.
+
+code_to_uri(source, 'FGN', aers:'source/foreign') :- !.
+code_to_uri(source, 'SDY', aers:'source/study') :- !.
+code_to_uri(source, 'LIT', aers:'source/literature') :- !.
+code_to_uri(source, 'CSM', aers:'source/consumer') :- !.
+code_to_uri(source, 'HP',  aers:'source/health_professional') :- !.
+code_to_uri(source, 'UF',  aers:'source/user_facility') :- !.
+code_to_uri(source, 'CR',  aers:'source/company_representative') :- !.
+code_to_uri(source, 'DT',  aers:'source/distributor') :- !.
+code_to_uri(source, 'OTH', aers:'other') :- !.
+
+code_to_uri(_, Code, literal(Code)).
+
+id_to_uri(Id, NS, URI) :-
+	rdf_current_ns(NS, Prefix),
+	atom_concat(Prefix, Id, URI).
+
+at_least_one_given(Values) :-
+	member(V, Values),
+	ground(V),
+	!.
diff --git a/lib/csvrdf.pl b/lib/csvrdf.pl
index 676e684..738176d 100644
--- a/lib/csvrdf.pl
+++ b/lib/csvrdf.pl
@@ -54,11 +54,18 @@ write_props([V|Vs], N, S, Table, Graph) :-
 	(   V = ''
 	->  true
 	;   table_map(Table, N1, P)
-        ->  rdf_assert(S, P, literal(V), Graph)
+        ->  value_assert(S, P, V, Graph)
 	;   true
 	),
 	write_props(Vs, N1, S, Table, Graph).
 
+value_assert(S, P, V, Graph) :-
+	(   number(V)
+	->  atom_number(A, V)
+	;   A = V
+	),
+	rdf_assert(S, P, literal(V), Graph).
+
 %%	create_table_map(+Header, +TableMapId)
 %
 %
@@ -83,6 +90,7 @@ create_table_map_([C|Cs], N, Prefix, Table) :-
 
 class_name(Prefix, Class, Options) :-
 	option(class(ClassName), Options),
+	!,
 	(   var(Prefix)
 	->  Class = ClassName
 	;   atom_concat(Prefix, ClassName, Class)