:- module(drug_spell_check,
	  [correct_drug_names/0
	  ]).


:- use_module(library(aspell)).
:- use_module(library(semweb/rdf_db)).
:- use_module(library(semweb/rdf_label)).
:- use_module(library(semweb/rdf_litindex)).

user:file_search_path(dict, '../dict').

correct_drug_names :-
	absolute_file_name(dict(.), DictDir),
	atom_concat('--dict-dir=',DictDir,DictOpt),
	create_aspell_process([DictOpt,
			       '--master=drugbank'],
			      PID),
	findall(T, drug_name_token(T), DrugTokens0),
	sort(DrugTokens0, DrugTokens),
	length(DrugTokens, TokenCount),
	debug(drugcorrect, 'tokens: ~w', [TokenCount]),
	spell_check(DrugTokens, PID, Suggestions),
	length(Suggestions, SuggestCount),
	debug(drugcorrect, '~w corrected', [SuggestCount]),
	maplist(assert_suggestion, Suggestions).

drug_name_token(A) :-
	rdf(_,aers:drugname,Lit),
	literal_text(Lit, H),
	tokenize_atom(H, As),
	member(A, As),
	atom_length(A, Length),
	Length > 3,
	\+ number(A),
	\+ drug_name(A, _).

spell_check([], _, []).
spell_check([H|T], PID, [H-Suggestion|Rest]) :-
	aspell(PID, H, Suggestions),
	member(Suggestion, Suggestions),
	drug_name(Suggestion, _),
	!,
	%debug(drugcorrect, '~w -> ~w', [A,Suggestion]),
	spell_check(T, PID, Rest).
spell_check([_|T], PID, Rest) :-
	spell_check(T, PID, Rest).

assert_suggestion(H-Suggestion) :-
	rdf_find_literals(case(H), Literals),
	forall((member(Lit, Literals),
	        rdf(R,aers:drugname,literal(Lit))),
	       rdf_assert(R,aers:drugname_corrected,literal(Suggestion),corrected_drugnames)).

drug_name(L, Drug) :-
	rdf(Drug,drugbank:'drugbank/genericName',literal(exact(L), _)),
	rdf(Drug,rdf:type,drugbank:'drugbank/drugs'),
	!.
drug_name(L, Drug) :-
	rdf(Drug,drugbank:'drugbank/synonym',literal(exact(L),_)),
	!.
drug_name(L, Drug) :-
	rdf(Drug,drugbank:'drugbank/brandName',literal(exact(L),_)).