aers_rewrite/commit
AERS conversion from ASCII files
author | Michiel Hildebrand |
---|---|
Wed Jan 30 11:39:09 2013 +0100 | |
committer | Michiel Hildebrand |
Wed Jan 30 11:39:09 2013 +0100 | |
commit | 3c0187cfc22c266b1afc0595b0ec407c3dd24871 |
tree | 8b3856fe7b9cf7e2d28fd27e8e9a5b998b116df4 |
parent | 82c29079b350e6b9c5b33d5cb3259fd20a15c9cc |
Diff style: patch stat
diff --git a/lib/ascii_rewrite.pl b/lib/ascii_rewrite.pl new file mode 100644 index 0000000..fab498b --- /dev/null +++ b/lib/ascii_rewrite.pl @@ -0,0 +1,154 @@ +:- module(ascii_rewrite, + [convert_ascii/0 + ]). + +:- use_module(library(csvrdf)). +:- use_module(library(semweb/rdf_db)). +:- use_module(library(semweb/rdf_turtle_write)). +:- use_module(library(xmlrdf/rdf_rewrite)). +:- use_module(library(xmlrdf/rdf_convert_util)). + + +:- dynamic + table_map/3. + +user:file_search_path(ascii, '../fda_data/ascii'). + +filename('Report', 'DEMO04Q1.TXT'). +filename('Drug', 'DRUG04Q1.TXT'). +filename('Reaction', 'REAC04Q1.TXT'). +filename('Outcome', 'OUTC04Q1.TXT'). +filename('Source', 'RPSR04Q1.TXT'). +filename('Therapy', 'THER04Q1.TXT'). +filename('Indication', 'INDI04Q1.TXT'). + +:- debug(csvrdf). + +convert_ascii :- + rdf_current_ns(aers, Prefix), + ( filename(Name, FileName), + absolute_file_name(ascii(FileName), File), + load_csv_as_rdf(File, [prefix(Prefix), + class(Name), + graph(Name), + separator(0'$), + match_arity(false) + ]), + rewrite_graph(Name), + fail + ; true + ). + +rewrite_graph(Graph) :- + rdf_rewrite(Graph). + + + +% report +% + +assign_drug_uris @@ +{S, rdf:type, aers:'Report'}, +{S, aers:isr, Id}\ +{S} +<=> +literal_to_id(Id, aers_r, URI), +{URI}. + + +% drug +% +% URIs of the drugs on the DRUG_SEQ + +assign_report_uris @@ +{S, aers:drug_seq, Id}\ +{S} +<=> +literal_to_id(Id, aers_d, URI), +{URI}. + +link_drug_report @@ +{S, rdf:type, aers:'Drug'}\ +{S, aers:isr, literal(ISR)} +<=> +literal_to_id(ISR, aers_r, R), +{R, aers:drug, S}. + + +% reaction + +link_reaction_report @@ +{S, rdf:type, aers:'Reaction'}, +{S, aers:isr, literal(ISR)}, +{S, aers:pt, Term} +<=> +literal_to_id(ISR, aers_r, R), +{R, aers:reaction, Term}. + + +% outcome + +link_outcome_report @@ +{S, rdf:type, aers:'Outcome'}, +{S, aers:isr, literal(ISR)}, +{S, aers:outc_cod, literal(Code)} +<=> +literal_to_id(ISR, aers_r, R), +outcome_uri(Code, URI), +{R, aers:outcome, URI}. + + +% source + +link_source_report @@ +{S, rdf:type, aers:'Source'}, +{S, aers:isr, literal(ISR)}, +{S, aers:rpsr_cod, literal(Code)} +<=> +literal_to_id(ISR, aers_r, R), +source_uri(Code, URI), +{R, aers:source, URI}. + + +% Therapy + +link_therapy_drug @@ +{S, rdf:type, aers:'Therapy'}\ +{S, aers:isr, _}, +{S, aers:drug_seq, literal(DRUG_SEQ)} +<=> +literal_to_id(DRUG_SEQ, aers_d, Drug), +{Drug, aers:therapy, S}. + + +% Indications + +link_indication_drug @@ +{S, rdf:type, aers:'Indication'}, +{S, aers:isr, _}, +{S, aers:drug_seq, literal(DRUG_SEQ)}, +{S, aers:indi_pt, Term} +<=> +literal_to_id(DRUG_SEQ, aers_d, Drug), +{Drug, aers:indication, Term}. + + + +outcome_uri('DE', C) :- !, rdf_equal(C, aers:'outcome/death'). +outcome_uri('LT', C) :- !, rdf_equal(C, aers:'outcome/life_threatening'). +outcome_uri('HO', C) :- !, rdf_equal(C, aers:'outcome/hospitalization'). +outcome_uri('DS', C) :- !, rdf_equal(C, aers:'outcome/disability'). +outcome_uri('CA', C) :- !, rdf_equal(C, aers:'outcome/congenital_anomaly'). +outcome_uri('RI', C) :- !, rdf_equal(C, aers:'outcome/required_intervention'). +outcome_uri('OT', C) :- !, rdf_equal(C, aers:'outcome/other'). + + +outcome_uri('FGN', C) :- !, rdf_equal(C, aers:'source/foreign'). +outcome_uri('SDY', C) :- !, rdf_equal(C, aers:'source/study'). +outcome_uri('LIT', C) :- !, rdf_equal(C, aers:'source/literature'). +outcome_uri('CSM', C) :- !, rdf_equal(C, aers:'source/consumer'). +outcome_uri('HP', C) :- !, rdf_equal(C, aers:'source/health_professional'). +outcome_uri('UF', C) :- !, rdf_equal(C, aers:'source/user_facility'). +outcome_uri('CR', C) :- !, rdf_equal(C, aers:'source/company_representative'). +outcome_uri('DT', C) :- !, rdf_equal(C, aers:'source/distributor'). +outcome_uri('OTH', C) :- !, rdf_equal(C, aers:'source/other'). diff --git a/lib/csvrdf.pl b/lib/csvrdf.pl new file mode 100644 index 0000000..676e684 --- /dev/null +++ b/lib/csvrdf.pl @@ -0,0 +1,100 @@ +:- module(csvrdf, + [load_csv_as_rdf/2, + flush_table_map/1 + ]). + +:- use_module(library(csv)). +:- use_module(library(semweb/rdf_db)). + +:- dynamic + table_map/3. + + +%% load_csv_as_rdf(+Input, +Options) +% +% +% * graph(+Graph) +% RDF Graph for storing the output. Default is =data= +% +% * prefix(+Prefix) +% Create a URI from the column header name by putting Prefix +% in front of it. Also used for the class. If no prefix is +% defined the name in the column header is used. +% +% * class(+ClassName) +% ClassName given to each row +% +% * see options for csv/2 + +load_csv_as_rdf(File, Options) :- + flush_table_map(File), + option(graph(Graph), Options, data), + option(prefix(Prefix), Options, _), + csv_read_file(File, [Header|Content], Options), + length(Content, Length), + debug(csvrdf, '~w rows', [Length]), + create_table_map(Header, Prefix, File), + class_name(Prefix, Class, Options), + write_rows(Content, Class, File, Graph). + +write_rows([], _, _, _). +write_rows([Row|Rows], Class, Table, Graph) :- + Row =.. [_|Cs], + rdf_bnode(BNode), + ( nonvar(Class) + -> rdf_assert(BNode, rdf:type, Class, Graph) + ; true + ), + write_props(Cs, 0, BNode, Table, Graph), + write_rows(Rows, Class, Table, Graph). + +write_props([], _, _, _, _). +write_props([V|Vs], N, S, Table, Graph) :- + N1 is N+1, + ( V = '' + -> true + ; table_map(Table, N1, P) + -> rdf_assert(S, P, literal(V), Graph) + ; true + ), + write_props(Vs, N1, S, Table, Graph). + +%% create_table_map(+Header, +TableMapId) +% +% + +create_table_map(Header, Prefix, Table) :- + Header =.. [_|Cols], + create_table_map_(Cols, 0, Prefix, Table). + +create_table_map_([], _, _, _). +create_table_map_([C|Cs], N, Prefix, Table) :- + N1 is N + 1, + downcase_atom(C, PName), + ( var(Prefix) + -> P = PName + ; atom_concat(Prefix, PName, P) + ), + assert(table_map(Table, N1, P)), + create_table_map_(Cs, N1, Prefix, Table). + + +%% class_name(+Prefix, -Class, Options) + +class_name(Prefix, Class, Options) :- + option(class(ClassName), Options), + ( var(Prefix) + -> Class = ClassName + ; atom_concat(Prefix, ClassName, Class) + ). +class_name(_, _Class, _). + + +capitalise(X, Y) :- + atom_codes(X, [H0|T]), + code_type(H0, to_lower(H)), + atom_codes(Y, [H|T]). + + +flush_table_map(Table) :- + retractall(table_map(Table, _, _)).