amalgame/commit

FIXED: snowball_label_match rdf11

authorJacco van Ossenbruggen
Sun Mar 1 13:53:28 2020 +0100
committerJacco van Ossenbruggen
Sun Mar 1 13:53:28 2020 +0100
commitc65dc6912438755868f3a78d6640519cf734728f
tree7e0fab4735b33662eaed7c711a9df952664c899f
parentd4569b7b97adec5c3f883295b71b157f5d530579
Diff style: patch stat
diff --git a/lib/ag_modules/snowball_label_match.pl b/lib/ag_modules/snowball_label_match.pl
index d532369..7a7d5db 100644
--- a/lib/ag_modules/snowball_label_match.pl
+++ b/lib/ag_modules/snowball_label_match.pl
@@ -4,7 +4,7 @@
 :- use_module(library(debug)).
 :- use_module(library(option)).
 :- use_module(library(snowball)).
-:- use_module(library(semweb/rdf_db)).
+:- use_module(library(semweb/rdf11)).
 :- use_module(library(amalgame/lit_distance)).
 :- use_module(library(amalgame/vocabulary)).
 :- use_module(string_match_util).
@@ -14,23 +14,25 @@ snowball_label_match(align(Source, Target, Prov0),
 	rdf_equal(amalgame:label,DefaultP),
 	option(snowball_language(Snowball_Language), Options, english),
 	option(prefix(PrefixLength), Options, 4),
-	option(sourcelabel(MatchProp1), Options, DefaultP),
-	option(targetlabel(MatchProp2), Options, DefaultP),
+	option(sourcelabel(MatchPropS), Options, DefaultP),
+	option(targetlabel(MatchPropT), Options, DefaultP),
 	option(matchacross_lang(MatchAcross), Options, true),
 	option(language(Lang),Options, any),
 	option(edit_distance(Edit_Distance), Options, 0),
 
 	(   Lang == 'any'
-	->  SourceLang = _
+	->  SourceLang = _Unbound
 	;   SourceLang = Lang
 	),
+	skos_has(Source, MatchPropS, SourceLabel@SourceLang, SourceProp, Options),
+	SourceLabel \= '',
+
 	% If we can't match across languages, set target language to source language
 	(   MatchAcross == false
 	->  TargetLang = SourceLang
 	;   true
 	),
 
-	skos_has(Source, MatchProp1, literal(lang(SourceLang, SourceLabel)), SourceProp, Options),
 
 	(   sub_atom(SourceLabel, 0, PrefixLength, _, Prefix)
 	->  true
@@ -43,11 +45,12 @@ snowball_label_match(align(Source, Target, Prov0),
 	% This should be replaced by hash lookup on preprocessed stem table FIXME
 	% Current implementation can miss stemmed matches because the prefix of the unstemmed labes do not match
 
-	% backtrack over all candidates with prefix match:
-	skos_has(Target, MatchProp2, literal(prefix(Prefix), lang(_TargetLang, _TargetLabel)), _TargetProp, Options),
+	% backtrack over all target candidates with prefix match:
+	rdf11:{ prefix(PrefixLabel, Prefix) },
+	skos_has(Target, MatchPropT, PrefixLabel@TargetLang, _TargetProp, Options),
 
 	% backtrack over all labels of the current target candidate:
-	skos_has(Target, MatchProp2, literal(lang(TargetLang, TargetLabel)), TargetProp, Options),
+	skos_has(Target, MatchPropT, TargetLabel@TargetLang, TargetProp, Options),
 	(   option(target_scheme(TargetScheme), Options)
 	->  vocab_member(Target, TargetScheme)
 	;   true
@@ -66,7 +69,7 @@ snowball_label_match(align(Source, Target, Prov0),
 		source_stem(SourceStem),
 		target_stem(TargetStem),
 		match(Match),
-		graph([rdf(Source, SourceProp, literal(lang(SourceLang, SourceLabel))),
-		       rdf(Target, TargetProp, literal(lang(TargetLang, TargetLabel)))])
+		graph([rdf(Source, SourceProp, SourceLabel@SourceLang),
+		       rdf(Target, TargetProp, TargetLabel@TargetLabel)])
 	       ],
 	debug(align_result, 'snowball match: ~p ~p', [Source,Target]).