virgil/commit
TRY token based spelling correction
author | Michiel Hildebrand |
---|---|
Tue Mar 26 14:44:05 2013 +0100 | |
committer | Michiel Hildebrand |
Tue Mar 26 14:44:05 2013 +0100 | |
commit | 7c82a4e6fec74968462d0ce2d2432c3ec153a08a |
tree | 85c34e20787d7d6b5805d82962542104283323d3 |
parent | ed5459424d92645c3368834dddfb54db70309933 |
Diff style: patch stat
diff --git a/lib/drug_spell_check.pl b/lib/drug_spell_check.pl index 65226a9..974d84b 100644 --- a/lib/drug_spell_check.pl +++ b/lib/drug_spell_check.pl @@ -17,40 +17,37 @@ correct_drug_names :- PID), findall(Lit, rdf(_,aers:drugname,Lit), Drugs0), sort(Drugs0, Drugs), - cleanup_lit(Drugs, Drugs1), - length(Drugs, UniqueCount), - length(Drugs1, CleanupCount), - debug(drugcorrect, '~w unique drug names', [UniqueCount]), - debug(drugcorrect, '~w after cleanup', [CleanupCount]), - spell_check(Drugs1, PID, Suggestions), + setof(T, ( member(D,Drugs), + drug_token(D, T) + ), + DrugTokens), + spell_check(DrugTokens, PID, Suggestions), length(Suggestions, SuggestCount), debug(drugcorrect, '~w corrected', [SuggestCount]), maplist(assert_suggestion, Suggestions). -cleanup_lit([], []). -cleanup_lit([Lit|T], [A-Lit|Rest]) :- +drug_token(Lit, A) :- literal_text(Lit, H), - atom_length(H, Length), - Length > 2, - tokenize_atom(H, [A0]), - downcase_atom(A0, A), - !, - cleanup_lit(T, Rest). -cleanup_lit([_|T], Rest) :- - cleanup_lit(T, Rest). + tokenize_atom(H, As), + member(A, As), + atom_length(A, Length), + Length > 2. spell_check([], _, []). -spell_check([A-Lit|T], PID, [Lit-Suggestion|Rest]) :- - aspell(PID, A, Suggestions), - Suggestions = [Suggestion|_], % we only keep the first suggestion +spell_check([H|T], PID, [H-Suggestion|Rest]) :- + aspell(PID, H, Suggestions), + member(Suggestion, Suggestions), + drug_name(Suggestion, _), !, %debug(drugcorrect, '~w -> ~w', [A,Suggestion]), spell_check(T, PID, Rest). spell_check([_|T], PID, Rest) :- spell_check(T, PID, Rest). -assert_suggestion(Lit-Suggestion) :- - forall(rdf(R,aers:drugname,Lit), +assert_suggestion(H-Suggestion) :- + rdf_find_literals(H, Literals), + forall((member(Lit, Literals), + rdf(R,aers:drugname,literal(Lit))), rdf_assert(R,aers:drugname_corrected,literal(Suggestion))).