virgil/commit
CLEANUP drug spell checking
author | Michiel Hildebrand |
---|---|
Tue Mar 26 15:08:47 2013 +0100 | |
committer | Michiel Hildebrand |
Tue Mar 26 15:08:47 2013 +0100 | |
commit | 99baf84e955bb5f2110fe800376a3be1ea7260d9 |
tree | 0b7733f9b0a51ea206f19d5dd18632146c4a60a7 |
parent | 7c82a4e6fec74968462d0ce2d2432c3ec153a08a |
Diff style: patch stat
diff --git a/lib/drug_spell_check.pl b/lib/drug_spell_check.pl index 974d84b..245c293 100644 --- a/lib/drug_spell_check.pl +++ b/lib/drug_spell_check.pl @@ -15,23 +15,24 @@ correct_drug_names :- create_aspell_process([DictOpt, '--master=drugbank'], PID), - findall(Lit, rdf(_,aers:drugname,Lit), Drugs0), - sort(Drugs0, Drugs), - setof(T, ( member(D,Drugs), - drug_token(D, T) - ), - DrugTokens), + findall(T, drug_name_token(T), DrugTokens0), + sort(DrugTokens0, DrugTokens), + length(DrugTokens, TokenCount), + debug(drugcorrect, 'tokens: ~w', [TokenCount]), spell_check(DrugTokens, PID, Suggestions), length(Suggestions, SuggestCount), debug(drugcorrect, '~w corrected', [SuggestCount]), maplist(assert_suggestion, Suggestions). -drug_token(Lit, A) :- +drug_name_token(A) :- + rdf(_,aers:drugname,Lit), literal_text(Lit, H), tokenize_atom(H, As), member(A, As), atom_length(A, Length), - Length > 2. + Length > 3, + \+ number(A), + \+ drug_name(A, _). spell_check([], _, []). spell_check([H|T], PID, [H-Suggestion|Rest]) :- @@ -48,26 +49,14 @@ assert_suggestion(H-Suggestion) :- rdf_find_literals(H, Literals), forall((member(Lit, Literals), rdf(R,aers:drugname,literal(Lit))), - rdf_assert(R,aers:drugname_corrected,literal(Suggestion))). - - - -in_drugbank(Q) :- - tokenize_atom(Q,DL), - member(Word,DL), - find_drug_by_name(Word, _). - -find_drug_by_name(Q, Drug) :- - rdf_find_literals(case(Q), Literals), - member(Lit, Literals), - drug_name(Lit, Drug). + rdf_assert(R,aers:drugname_corrected,literal(Suggestion),corrected_drugnames)). drug_name(L, Drug) :- - rdf(Drug,rdfs:label,literal(L)), + rdf(Drug,drugbank:'drugbank/genericName',literal(exact(L), _)), rdf(Drug,rdf:type,drugbank:'drugbank/drugs'), !. drug_name(L, Drug) :- - rdf(Drug,drugbank:'drugbank/synonym',literal(L)), + rdf(Drug,drugbank:'drugbank/synonym',literal(exact(L),_)), !. drug_name(L, Drug) :- - rdf(Drug,drugbank:'drugbank/brandName',literal(L)). + rdf(Drug,drugbank:'drugbank/brandName',literal(exact(L),_)).