virgil/commit

ADD drug name normalisation

authorMichiel Hildebrand
Tue Mar 26 16:25:15 2013 +0100
committerMichiel Hildebrand
Tue Mar 26 16:25:15 2013 +0100
commitcf8e9ac2662a68b5905de9da58303fed25a911b7
tree1f4c9a64e377c302d08040c7454e7057a7eb2b6b
parent2fcdbb59105d77b0d1e9b08fec13bc51daa00259
Diff style: patch stat
diff --git a/api/drugs.pl b/api/drugs.pl
index b3d575f..4e6737e 100644
--- a/api/drugs.pl
+++ b/api/drugs.pl
@@ -7,6 +7,7 @@
 :- use_module(library(http/http_json)).
 :- use_module(library(semweb/rdf_litindex)).
 :- use_module(library(semweb/rdf_label)).
+:- use_module(library(drug_normalise)).
 
 :- http_handler(cliopatria(aers/api/drug/mentions), http_drug_mentions, []).
 :- http_handler(cliopatria(aers/api/drug/brands), http_drug_brands, []).
@@ -37,10 +38,11 @@ drug_mentions(Q, Method, Mentions) :-
 	keysort(Mentions0, Mentions1),
 	reverse(Mentions1, Mentions).
 
-drug_mention(exact, Q, Literal, Report) :-
-	rdf(DrugUse, aers:drugname, literal(exact(Q), Literal)),
-	rdf(Report, aers:drug, DrugUse).
-drug_mention(Method, Q, Lit, Report) :-
+drug_mention(exact, Q, Normalised_Lit, Report) :-
+	rdf(DrugUse, aers:drugname, literal(exact(Q), Lit)),
+	rdf(Report, aers:drug, DrugUse),
+	drug_normalise(Lit, Normalised_Lit).
+drug_mention(Method, Q, Normalised_Lit, Report) :-
 	(   Method = word
 	->  Query = case(Q)
 	;   Method = stem
@@ -59,6 +61,7 @@ drug_mention(Method, Q, Lit, Report) :-
 	;   rdf(DrugUse, aers:drugname, literal(L)),
 	    Lit = L
 	),
+	drug_normalise(Lit, Normalised_Lit),
 	rdf(Report, aers:drug, DrugUse).
 
 drug_list_mentions([], _, []).
diff --git a/lib/drug_normalise.pl b/lib/drug_normalise.pl
new file mode 100644
index 0000000..b7576d8
--- /dev/null
+++ b/lib/drug_normalise.pl
@@ -0,0 +1,30 @@
+:- module(drug_normalise,
+	  [drug_normalise/2
+	  ]).
+
+drug_normalise(DrugName, Normalised) :-
+	tokenize_atom(DrugName, Tokens),
+	list_to_set(Tokens, TokenSet0),
+	convert(TokenSet0, TokenSet1),
+	exclude(punct, TokenSet1, TokenSet),
+	atomic_list_concat(TokenSet, ' ', Normalised).
+
+convert([], []).
+convert([N0|T], [N|Rest]) :-
+	number(N0),
+	!,
+	N is abs(N0),
+	convert(T, Rest).
+convert(['MG','/','M2'|T], ['MG'|Rest]) :-
+	!,
+	convert(T, Rest).
+convert([H|T], [H|Rest]) :-
+	convert(T, Rest).
+
+punct('!').
+punct('.').
+punct(',').
+punct('-').
+punct('_').
+punct(')').
+punct('(').