amalgame/commit
FIXED: snowball_label_match rdf11
author | Jacco van Ossenbruggen |
---|---|
Sun Mar 1 13:53:28 2020 +0100 | |
committer | Jacco van Ossenbruggen |
Sun Mar 1 13:53:28 2020 +0100 | |
commit | c65dc6912438755868f3a78d6640519cf734728f |
tree | 7e0fab4735b33662eaed7c711a9df952664c899f |
parent | d4569b7b97adec5c3f883295b71b157f5d530579 |
Diff style: patch stat
diff --git a/lib/ag_modules/snowball_label_match.pl b/lib/ag_modules/snowball_label_match.pl index d532369..7a7d5db 100644 --- a/lib/ag_modules/snowball_label_match.pl +++ b/lib/ag_modules/snowball_label_match.pl @@ -4,7 +4,7 @@ :- use_module(library(debug)). :- use_module(library(option)). :- use_module(library(snowball)). -:- use_module(library(semweb/rdf_db)). +:- use_module(library(semweb/rdf11)). :- use_module(library(amalgame/lit_distance)). :- use_module(library(amalgame/vocabulary)). :- use_module(string_match_util). @@ -14,23 +14,25 @@ snowball_label_match(align(Source, Target, Prov0), rdf_equal(amalgame:label,DefaultP), option(snowball_language(Snowball_Language), Options, english), option(prefix(PrefixLength), Options, 4), - option(sourcelabel(MatchProp1), Options, DefaultP), - option(targetlabel(MatchProp2), Options, DefaultP), + option(sourcelabel(MatchPropS), Options, DefaultP), + option(targetlabel(MatchPropT), Options, DefaultP), option(matchacross_lang(MatchAcross), Options, true), option(language(Lang),Options, any), option(edit_distance(Edit_Distance), Options, 0), ( Lang == 'any' - -> SourceLang = _ + -> SourceLang = _Unbound ; SourceLang = Lang ), + skos_has(Source, MatchPropS, SourceLabel@SourceLang, SourceProp, Options), + SourceLabel \= '', + % If we can't match across languages, set target language to source language ( MatchAcross == false -> TargetLang = SourceLang ; true ), - skos_has(Source, MatchProp1, literal(lang(SourceLang, SourceLabel)), SourceProp, Options), ( sub_atom(SourceLabel, 0, PrefixLength, _, Prefix) -> true @@ -43,11 +45,12 @@ snowball_label_match(align(Source, Target, Prov0), % This should be replaced by hash lookup on preprocessed stem table FIXME % Current implementation can miss stemmed matches because the prefix of the unstemmed labes do not match - % backtrack over all candidates with prefix match: - skos_has(Target, MatchProp2, literal(prefix(Prefix), lang(_TargetLang, _TargetLabel)), _TargetProp, Options), + % backtrack over all target candidates with prefix match: + rdf11:{ prefix(PrefixLabel, Prefix) }, + skos_has(Target, MatchPropT, PrefixLabel@TargetLang, _TargetProp, Options), % backtrack over all labels of the current target candidate: - skos_has(Target, MatchProp2, literal(lang(TargetLang, TargetLabel)), TargetProp, Options), + skos_has(Target, MatchPropT, TargetLabel@TargetLang, TargetProp, Options), ( option(target_scheme(TargetScheme), Options) -> vocab_member(Target, TargetScheme) ; true @@ -66,7 +69,7 @@ snowball_label_match(align(Source, Target, Prov0), source_stem(SourceStem), target_stem(TargetStem), match(Match), - graph([rdf(Source, SourceProp, literal(lang(SourceLang, SourceLabel))), - rdf(Target, TargetProp, literal(lang(TargetLang, TargetLabel)))]) + graph([rdf(Source, SourceProp, SourceLabel@SourceLang), + rdf(Target, TargetProp, TargetLabel@TargetLabel)]) ], debug(align_result, 'snowball match: ~p ~p', [Source,Target]).