amalgame/commit
IMPROVED: split target candidate generation by prefix from label generation in snowball stemmer. Needs to be really fixed by stem lookup table in the future
author | Jacco van Ossenbruggen |
---|---|
Mon Jul 22 14:39:32 2019 +0200 | |
committer | Jacco van Ossenbruggen |
Mon Jul 22 14:39:32 2019 +0200 | |
commit | 33e1732f05b3ca4cfb11d7583166e2d6d2b24652 |
tree | 9898cc242719b28c983f141631021a63bd6dd244 |
parent | 47169ac584adbe229e287094247ddce06cfb18b5 |
Diff style: patch stat
diff --git a/lib/ag_modules/snowball_label_match.pl b/lib/ag_modules/snowball_label_match.pl index f427d7f..dc9a503 100644 --- a/lib/ag_modules/snowball_label_match.pl +++ b/lib/ag_modules/snowball_label_match.pl @@ -39,7 +39,15 @@ snowball_label_match(align(Source, Target, Prov0), downcase_atom(SourceLabel, SourceLabel0), snowball(Snowball_Language, SourceLabel0, SourceStem), - skos_has(Target, MatchProp2, literal(prefix(Prefix), lang(TargetLang, TargetLabel)), TargetProp, Options), + % Target candidate generation based on prefixes... + % This should be replaced by hash lookup on preprocessed stem table FIXME + % Current implementation can miss stemmed matches because the prefix of the unstemmed labes do not match + + % backtrack over all candidates with prefix match: + skos_has(Target, MatchProp2, literal(prefix(Prefix), lang(_TargetLang, _TargetLabel)), _TargetProp, Options), + + % backtrack over all labels of the current target candidate: + skos_has(Target, MatchProp2, literal(lang(TargetLang, TargetLabel)), TargetProp, Options), ( option(target_scheme(TargetScheme), Options) -> vocab_member(Target, TargetScheme) ; true