amalgame/commit

IMPROVED: split target candidate generation by prefix from label generation in snowball stemmer. Needs to be really fixed by stem lookup table in the future

authorJacco van Ossenbruggen
Mon Jul 22 14:39:32 2019 +0200
committerJacco van Ossenbruggen
Mon Jul 22 14:39:32 2019 +0200
commit33e1732f05b3ca4cfb11d7583166e2d6d2b24652
tree9898cc242719b28c983f141631021a63bd6dd244
parent47169ac584adbe229e287094247ddce06cfb18b5
Diff style: patch stat
diff --git a/lib/ag_modules/snowball_label_match.pl b/lib/ag_modules/snowball_label_match.pl
index f427d7f..dc9a503 100644
--- a/lib/ag_modules/snowball_label_match.pl
+++ b/lib/ag_modules/snowball_label_match.pl
@@ -39,7 +39,15 @@ snowball_label_match(align(Source, Target, Prov0),
 	downcase_atom(SourceLabel, SourceLabel0),
 	snowball(Snowball_Language, SourceLabel0, SourceStem),
 
-	skos_has(Target, MatchProp2, literal(prefix(Prefix), lang(TargetLang, TargetLabel)), TargetProp, Options),
+	% Target candidate generation based on prefixes...
+	% This should be replaced by hash lookup on preprocessed stem table FIXME
+	% Current implementation can miss stemmed matches because the prefix of the unstemmed labes do not match
+
+	% backtrack over all candidates with prefix match:
+	skos_has(Target, MatchProp2, literal(prefix(Prefix), lang(_TargetLang, _TargetLabel)), _TargetProp, Options),
+
+	% backtrack over all labels of the current target candidate:
+	skos_has(Target, MatchProp2, literal(lang(TargetLang, TargetLabel)), TargetProp, Options),
 	(   option(target_scheme(TargetScheme), Options)
 	->  vocab_member(Target, TargetScheme)
 	;   true