aers_rewrite/commit

AERS conversion from ASCII files

authorMichiel Hildebrand
Wed Jan 30 11:39:09 2013 +0100
committerMichiel Hildebrand
Wed Jan 30 11:39:09 2013 +0100
commit3c0187cfc22c266b1afc0595b0ec407c3dd24871
tree8b3856fe7b9cf7e2d28fd27e8e9a5b998b116df4
parent82c29079b350e6b9c5b33d5cb3259fd20a15c9cc
Diff style: patch stat
diff --git a/lib/ascii_rewrite.pl b/lib/ascii_rewrite.pl
new file mode 100644
index 0000000..fab498b
--- /dev/null
+++ b/lib/ascii_rewrite.pl
@@ -0,0 +1,154 @@
+:- module(ascii_rewrite,
+	  [convert_ascii/0
+	  ]).
+
+:- use_module(library(csvrdf)).
+:- use_module(library(semweb/rdf_db)).
+:- use_module(library(semweb/rdf_turtle_write)).
+:- use_module(library(xmlrdf/rdf_rewrite)).
+:- use_module(library(xmlrdf/rdf_convert_util)).
+
+
+:- dynamic
+	table_map/3.
+
+user:file_search_path(ascii, '../fda_data/ascii').
+
+filename('Report', 'DEMO04Q1.TXT').
+filename('Drug', 'DRUG04Q1.TXT').
+filename('Reaction', 'REAC04Q1.TXT').
+filename('Outcome', 'OUTC04Q1.TXT').
+filename('Source', 'RPSR04Q1.TXT').
+filename('Therapy', 'THER04Q1.TXT').
+filename('Indication', 'INDI04Q1.TXT').
+
+:- debug(csvrdf).
+
+convert_ascii :-
+	rdf_current_ns(aers, Prefix),
+	(   filename(Name, FileName),
+	    absolute_file_name(ascii(FileName), File),
+	    load_csv_as_rdf(File, [prefix(Prefix),
+				   class(Name),
+				   graph(Name),
+				   separator(0'$),
+				   match_arity(false)
+				   ]),
+	    rewrite_graph(Name),
+	    fail
+	;   true
+	).
+
+rewrite_graph(Graph) :-
+	rdf_rewrite(Graph).
+
+
+
+% report
+%
+
+assign_drug_uris @@
+{S, rdf:type, aers:'Report'},
+{S, aers:isr, Id}\
+{S}
+<=>
+literal_to_id(Id, aers_r, URI),
+{URI}.
+
+
+% drug
+%
+% URIs of the drugs on the DRUG_SEQ
+
+assign_report_uris @@
+{S, aers:drug_seq, Id}\
+{S}
+<=>
+literal_to_id(Id, aers_d, URI),
+{URI}.
+
+link_drug_report @@
+{S, rdf:type, aers:'Drug'}\
+{S, aers:isr, literal(ISR)}
+<=>
+literal_to_id(ISR, aers_r, R),
+{R, aers:drug, S}.
+
+
+% reaction
+
+link_reaction_report @@
+{S, rdf:type, aers:'Reaction'},
+{S, aers:isr, literal(ISR)},
+{S, aers:pt, Term}
+<=>
+literal_to_id(ISR, aers_r, R),
+{R, aers:reaction, Term}.
+
+
+% outcome
+
+link_outcome_report @@
+{S, rdf:type, aers:'Outcome'},
+{S, aers:isr, literal(ISR)},
+{S, aers:outc_cod, literal(Code)}
+<=>
+literal_to_id(ISR, aers_r, R),
+outcome_uri(Code, URI),
+{R, aers:outcome, URI}.
+
+
+% source
+
+link_source_report @@
+{S, rdf:type, aers:'Source'},
+{S, aers:isr, literal(ISR)},
+{S, aers:rpsr_cod, literal(Code)}
+<=>
+literal_to_id(ISR, aers_r, R),
+source_uri(Code, URI),
+{R, aers:source, URI}.
+
+
+% Therapy
+
+link_therapy_drug @@
+{S, rdf:type, aers:'Therapy'}\
+{S, aers:isr, _},
+{S, aers:drug_seq, literal(DRUG_SEQ)}
+<=>
+literal_to_id(DRUG_SEQ, aers_d, Drug),
+{Drug, aers:therapy, S}.
+
+
+% Indications
+
+link_indication_drug @@
+{S, rdf:type, aers:'Indication'},
+{S, aers:isr, _},
+{S, aers:drug_seq, literal(DRUG_SEQ)},
+{S, aers:indi_pt, Term}
+<=>
+literal_to_id(DRUG_SEQ, aers_d, Drug),
+{Drug, aers:indication, Term}.
+
+
+
+outcome_uri('DE', C) :- !, rdf_equal(C, aers:'outcome/death').
+outcome_uri('LT', C) :- !, rdf_equal(C, aers:'outcome/life_threatening').
+outcome_uri('HO', C) :- !, rdf_equal(C, aers:'outcome/hospitalization').
+outcome_uri('DS', C) :- !, rdf_equal(C, aers:'outcome/disability').
+outcome_uri('CA', C) :- !, rdf_equal(C, aers:'outcome/congenital_anomaly').
+outcome_uri('RI', C) :- !, rdf_equal(C, aers:'outcome/required_intervention').
+outcome_uri('OT', C) :- !, rdf_equal(C, aers:'outcome/other').
+
+
+outcome_uri('FGN', C) :- !, rdf_equal(C, aers:'source/foreign').
+outcome_uri('SDY', C) :- !, rdf_equal(C, aers:'source/study').
+outcome_uri('LIT', C) :- !, rdf_equal(C, aers:'source/literature').
+outcome_uri('CSM', C) :- !, rdf_equal(C, aers:'source/consumer').
+outcome_uri('HP', C) :-  !, rdf_equal(C, aers:'source/health_professional').
+outcome_uri('UF', C) :-  !, rdf_equal(C, aers:'source/user_facility').
+outcome_uri('CR', C) :-  !, rdf_equal(C, aers:'source/company_representative').
+outcome_uri('DT', C) :-  !, rdf_equal(C, aers:'source/distributor').
+outcome_uri('OTH', C) :- !, rdf_equal(C, aers:'source/other').
diff --git a/lib/csvrdf.pl b/lib/csvrdf.pl
new file mode 100644
index 0000000..676e684
--- /dev/null
+++ b/lib/csvrdf.pl
@@ -0,0 +1,100 @@
+:- module(csvrdf,
+	  [load_csv_as_rdf/2,
+	   flush_table_map/1
+	  ]).
+
+:- use_module(library(csv)).
+:- use_module(library(semweb/rdf_db)).
+
+:- dynamic
+	table_map/3.
+
+
+%%	load_csv_as_rdf(+Input, +Options)
+%
+%
+%	    * graph(+Graph)
+%	    RDF Graph for storing the output.  Default is =data=
+%
+%	    * prefix(+Prefix)
+%	    Create a URI from the column header name by putting Prefix
+%	    in front of it. Also used for the class. If no prefix is
+%	    defined the name in the column header is used.
+%
+%	    * class(+ClassName)
+%	    ClassName given to each row
+%
+%	    * see options for csv/2
+
+load_csv_as_rdf(File, Options) :-
+	flush_table_map(File),
+	option(graph(Graph), Options, data),
+	option(prefix(Prefix), Options, _),
+	csv_read_file(File, [Header|Content], Options),
+	length(Content, Length),
+	debug(csvrdf, '~w rows', [Length]),
+	create_table_map(Header, Prefix, File),
+	class_name(Prefix, Class, Options),
+	write_rows(Content, Class, File, Graph).
+
+write_rows([], _, _, _).
+write_rows([Row|Rows], Class, Table, Graph) :-
+	Row =.. [_|Cs],
+	rdf_bnode(BNode),
+	(   nonvar(Class)
+	->  rdf_assert(BNode, rdf:type, Class, Graph)
+	;   true
+	),
+	write_props(Cs, 0, BNode, Table, Graph),
+	write_rows(Rows, Class, Table, Graph).
+
+write_props([], _, _, _, _).
+write_props([V|Vs], N, S, Table, Graph) :-
+	N1 is N+1,
+	(   V = ''
+	->  true
+	;   table_map(Table, N1, P)
+        ->  rdf_assert(S, P, literal(V), Graph)
+	;   true
+	),
+	write_props(Vs, N1, S, Table, Graph).
+
+%%	create_table_map(+Header, +TableMapId)
+%
+%
+
+create_table_map(Header, Prefix, Table) :-
+	Header =.. [_|Cols],
+	create_table_map_(Cols, 0, Prefix, Table).
+
+create_table_map_([], _, _, _).
+create_table_map_([C|Cs], N, Prefix, Table) :-
+	N1 is N + 1,
+	downcase_atom(C, PName),
+	(   var(Prefix)
+	->  P = PName
+	;   atom_concat(Prefix, PName, P)
+	),
+	assert(table_map(Table, N1, P)),
+	create_table_map_(Cs, N1, Prefix, Table).
+
+
+%%	class_name(+Prefix, -Class, Options)
+
+class_name(Prefix, Class, Options) :-
+	option(class(ClassName), Options),
+	(   var(Prefix)
+	->  Class = ClassName
+	;   atom_concat(Prefix, ClassName, Class)
+	).
+class_name(_, _Class, _).
+
+
+capitalise(X, Y) :-
+	atom_codes(X, [H0|T]),
+	code_type(H0, to_lower(H)),
+	atom_codes(Y, [H|T]).
+
+
+flush_table_map(Table) :-
+	retractall(table_map(Table, _, _)).