aers_rewrite/commit
cleanup the records, mainly removing blank nodes
author | CWI eculture 2 |
---|---|
Tue Jan 22 10:31:31 2013 +0100 | |
committer | CWI eculture 2 |
Tue Jan 22 10:31:31 2013 +0100 | |
commit | b934224aa2ca0870b761f09a9fcfbae59012e335 |
tree | dba5e57658e41cc9661bfbdf78fb1fb11a14b196 |
parent | 22427e98f0c9b27843cadf9d27e1fbaa71b0037f |
Diff style: patch stat
diff --git a/config-available/aers_rewrite.pl b/config-available/aers_rewrite.pl index 0732b0a..6511182 100644 --- a/config-available/aers_rewrite.pl +++ b/config-available/aers_rewrite.pl @@ -6,5 +6,6 @@ :- use_module(library(semweb/rdf_db)). :- rdf_register_ns(aers, 'http://aers.data2semantics.org/vocab/'). +:- rdf_register_ns(aers_r, 'http://aers.data2semantics.org/report/'). :- use_module(library(aers_rewrite)). diff --git a/lib/aers_rewrite.pl b/lib/aers_rewrite.pl index 1eaede5..e5aaa0d 100644 --- a/lib/aers_rewrite.pl +++ b/lib/aers_rewrite.pl @@ -1,11 +1,14 @@ :- module(aers_rewrite, - [test/0, + [convert/0, + convert/2, + test/0, load/0, - load_year/1, - rewrite/0 + load_year/2, + rewrite_graph/1 ]). :- use_module(library(semweb/rdf_db)). +:- use_module(library(semweb/rdf_turtle_write)). :- use_module(library(xmlrdf/xmlrdf)). :- use_module(library(xmlrdf/rdf_rewrite)). :- use_module(library(xmlrdf/rdf_convert_util)). @@ -13,61 +16,218 @@ :- rdf_register_ns(aers, 'http://aers.data2semantics.org/vocab/'). user:file_search_path(data, '../fda_data'). +user:file_search_path(aers_rdf, '../rdf/aers'). + +:- set_prolog_stack(global, limit(2*10**9)). + +year('04'). +year('05'). +year('06'). +year('07'). +year('08'). +year('09'). + +:- debug(convert). +:- debug(rdf_rewrite). + +convert :- + ( year(Year), + atom_concat('aers_20', Year, Graph), + convert(Year, Graph), + fail + ; true + ). + +convert(Year, Graph) :- + debug(convert, 'convert ~w to ~w', [Year, Graph]), + atom_concat(Graph, '.ttl', FileName), + absolute_file_name(aers_rdf(FileName), File), + load_year(Year, Graph), + debug(convert, 'rewrite ~w', [Graph]), + rewrite_graph(Graph), + debug(convert, 'save ~w', [Graph]), + rdf_save_turtle(File, [graph(Graph)]), + rdf_retractall(_,_,_,Graph). test :- absolute_file_name(data('Adr_test.sgm'), TestFile), - load(TestFile). + load(test, TestFile). load :- absolute_file_name(data(sgml), Dir, [ file_type(directory) ]), + Graph = data, atom_concat(Dir, '/*.SGM', Pattern), expand_file_name(Pattern, Files), - maplist(load, Files). + maplist(load(Graph), Files). -load_year(Year) :- +load_year(Year, Graph) :- absolute_file_name(data(sgml), Dir, [ file_type(directory) ]), concat_atom([Dir, '/ADR',Year,'*.SGM'], Pattern), expand_file_name(Pattern, Files), - maplist(load, Files). + maplist(load(Graph), Files). -load(File) :- +load(Graph, File) :- rdf_current_ns(aers, Prefix), load_xml_as_rdf(File, [ dialect(xml), unit(safetyreport), - prefix(Prefix) + prefix(Prefix), + graph(Graph) ]). -rewrite :- - rdf_rewrite(data). +rewrite_graph(Graph) :- + rdf_rewrite(Graph). +% report +% % URIs of the reports are based on the safetyreportid assign_uris @@ -{S, aers:safetyreportid, literal(Id)}\ -{ S } <=> - literal_to_id(['report', Id], aers, URI), - { URI }. +{S, aers:safetyreportid, Id}\ +{S} +<=> +literal_to_id(Id, aers_r, URI), +{URI}. -% Reactions are direct properties of the report and not the patient +% reaction +% +% Reactions are direct properties of the report reaction @@ -{Patient, aers:reaction, Reaction}, {Report, aers:patient, Patient}\ -{Patient, aers:reaction, Reaction} +{Patient, aers:reaction, BNode}, +{BNode, aers:reactionmeddrapt, Reaction}, +{BNode, _, _} <=> { Report, aers:reaction, Reaction }. -% Drugs are direct properties of the report and not the patient +% drug +% +% Drugs are direct properties of the report +% characterization codes are URIs drug @@ -{Patient, aers:drug, Drug}, {Report, aers:patient, Patient}\ -{Patient, aers:drug, Drug} +{Patient, aers:drug, Drug}, +{Drug, aers:drugcharacterization, literal(C)} +<=> +drug_characterization_uri(C, URI), +{ Report, aers:drug, Drug }, +{ Drug, aers:drugcharacterization, URI}. + +% Patient +% +% deathDate is a direct property of the patient +% gender has a URI + +patient_death @@ +{Patient, aers:patientdeath, BNode}, +{BNode, aers:patientdeathdate, Date}, +{BNode, _, _} +<=> +{Patient, aers:patientdeathdate, Date}. + +patient_gender @@ +{Patient, aers:patientsex, literal(C)} +<=> +gender_uri(C, URI), +{Patient, aers:patientsex, URI}. + +outcome @@ +{Report, P, literal('1')} +<=> +outcome_uri(P, Outcome), +{Report, aers:outcome, Outcome}. + +outcome @@ +{_, aers:serious, literal('1')} <=> -{ Report, aers:drug, Drug }. +true. + +% sender +% +% Sender is a direct property + +sender @@ +{Record, aers:sender, BNode}, +{BNode, aers:senderorganization, Organization}, +{BNode, _, _} +<=> +{Record, aers:sender, Organization}. + +% Source +% +% qualification code and code are direct properties of +% the record +% qualification code has a URI + +source @@ +{Report, aers:primarysource, Source}, +{Source, aers:qualification, literal(Q)}, +{Source, aers:reportercountry, Country} ?, +{Source, _, _} +<=> +source_qualification_uri(Q, Qualification), +{Report, aers:sourceQualification, Qualification}, +{Report, aers:sourceCountry, Country}. + +% meaningless triples +% +% date format is always 102, and thus not needed +% we don't need empty literals + +format_properties @@ +{_, P, _} +<=> +format_property(P). + +empty_literals @@ +{_,_,literal('')} +<=> +true. + + +format_property(P) :- rdf_equal(P, aers:drugenddateformat). +format_property(P) :- rdf_equal(P, aers:drugstartdateformat). +format_property(P) :- rdf_equal(P, aers:patientdeathdateformat). +format_property(P) :- rdf_equal(P, aers:receiptdateformat). +format_property(P) :- rdf_equal(P, aers:receivedateformat). +format_property(P) :- rdf_equal(P, aers:transmissiondateformat). +%format_property(P) :- rdf_equal(aers:patientonsetageunit, P). + +source_qualification_uri('1', C) :- !, rdf_equal(C, aers:'source/physician'). +source_qualification_uri('2', C) :- !, rdf_equal(C, aers:'source/pharmacist'). +source_qualification_uri('3', C) :- !, rdf_equal(C, aers:'source/healthProfessional'). +source_qualification_uri('4', C) :- !, rdf_equal(C, aers:'source/lawyer'). +source_qualification_uri('5', C) :- rdf_equal(C, aers:'source/nonHealthProfessional'). + +gender_uri('1', C) :- !, rdf_equal(C, aers:'gender/male'). +gender_uri('2', C) :- rdf_equal(C, aers:'gender/female'). + +drug_characterization_uri('1', C) :- !, rdf_equal(C, aers:'drug/role/suspect'). +drug_characterization_uri('2', C) :- !, rdf_equal(C, aers:'drug/role/concomitant'). +drug_characterization_uri('3', C) :- rdf_equal(C, aers:'drug/role/interacting'). + +outcome_uri(P, C) :- + rdf_equal(P, aers:seriousnessdeath),!, + rdf_equal(C, aers:'outcome/death'). +outcome_uri(P, C) :- + rdf_equal(P, aers:seriousnesslifethreatening),!, + rdf_equal(C, aers:'outcome/lifethreatening'). +outcome_uri(P, C) :- + rdf_equal(P, aers:seriousnesshospitalization),!, + rdf_equal(C, aers:'outcome/hospitalization'). +outcome_uri(P, C) :- + rdf_equal(P, aers:seriousnessdisabling),!, + rdf_equal(C, aers:'outcome/disabling'). +outcome_uri(P, C) :- + rdf_equal(P, aers:seriousnesscongenitalanomaly),!, + rdf_equal(C, aers:'outcome/congenitalanomaly'). +outcome_uri(P, C) :- + rdf_equal(P, aers:seriousnessother), + rdf_equal(C, aers:'outcome/other').