amalgame/commit

FIXED: make compound label matcher behave like the other label modules

authorJacco van Ossenbruggen
Mon Sep 8 21:57:58 2014 +0200
committerJacco van Ossenbruggen
Mon Sep 8 21:57:58 2014 +0200
commit9e12d452509262d0c5761d3b6bf28f31e5478fa8
tree16b1ff08dc07fa57b78338afa55536e71fb83ae1
parent07e760bdbb3de93e5360c9a98ce5b02faee7dcec
Diff style: patch stat
diff --git a/lib/ag_modules/compound_label_generator.pl b/lib/ag_modules/compound_label_generator.pl
index 69672cf..767e549 100644
--- a/lib/ag_modules/compound_label_generator.pl
+++ b/lib/ag_modules/compound_label_generator.pl
@@ -1,7 +1,6 @@
 :- module(compound_label_generator,
 	  []).
 
-:- use_module(library(lists)).
 :- use_module(library(semweb/rdf_db)).
 :- use_module(library(amalgame/vocabulary)).
 :- use_module(string_match_util).
@@ -38,8 +37,7 @@ parameter(match_qualified_only, boolean, false,
 %	and Target.
 
 matcher(Source, Target, Mappings, Options) :-
-	findall(M, align(Source, Target, M, Options), ListOfLists),
-	append(ListOfLists, MappingsUnSorted),
+	findall(M, align(Source, Target, M, Options), MappingsUnSorted),
 	sort(MappingsUnSorted, Mappings).
 
 align(Source, TargetScheme, MatchList, Options) :-
diff --git a/lib/ag_modules/compound_label_match.pl b/lib/ag_modules/compound_label_match.pl
index 4406f76..2c752ac 100644
--- a/lib/ag_modules/compound_label_match.pl
+++ b/lib/ag_modules/compound_label_match.pl
@@ -8,93 +8,72 @@
 :- use_module(library(amalgame/vocabulary)).
 :- use_module(string_match_util).
 
-compound_label_match(align(Source, _Target, Prov0), Results, Options) :-
+%%	compound_label_match(?In, -Out, +Options) is non det.
+%
+%	In and Out are align(S,T,P) terms.
+%	Typically, there are to modes for calling this predicate:
+%
+%       * Both S and T of In are instantiated, and the predicate is
+%	  called to find a match between S and T, or
+%	* Only S is ground and the predicate is called to find a
+%	  matching T.
+compound_label_match(align(Source, Target, Prov0),
+		  align(Source, Target, [Prov|Prov0]), Options) :-
 	rdf_equal(rdfs:label, RdfsLabel),
-	option(sourcelabel(MatchProp1), Options, RdfsLabel),
+	option(sourcelabel(MatchPropS), Options, RdfsLabel),
+	option(targetlabel(MatchPropT), Options, RdfsLabel),
+	option(matchacross_lang(MatchAcross), Options, true),
+	option(matchacross_type(IgnoreType),  Options, true),
+	option(case_sensitive(CaseSensitive), Options, false),
 	option(source_language(Lang), Options, 'any'),
-
 	(   Lang == 'any'
-	->  SourceLang = _
+	->  SourceLang = _UnBound
 	;   SourceLang = Lang
 	),
 
-	Prov = [method(compound_label_match),
-		score(Score),
-		match(Match),
-		graph([rdf(Source, SourceProp,
-			   literal(lang(SourceLang, SourceLabel)))])
-	       ],
-
-	skos_match(Source, MatchProp1,
-		   literal(lang(SourceLang, SourceLabel)), SourceProp, Options),
-	rdf_tokenize_literal(SourceLabel, Tokens),
-	findall(Targets-Token-LabelAmbScore,
-		(   member(Token, Tokens),
-		    atom(Token),
-		    match_label(Source, Token, Targets,
-				[source_lang(SourceLang)|Options]),
-		    length(Targets, LabelAmbScore)
-
-		),
-		Targets),
-	length(Tokens, TokenLength),
-	length(Targets, NrMatched),
-	NrMatched > 0,
-	Match is NrMatched/TokenLength,
-	format(atom(Score), 'Matched ~w out of ~w parts', [NrMatched, TokenLength]),
-	create_results(Targets, Source, Prov0, Prov, Results).
+	% If we cannot match across languages, set target language to source language
+	(   MatchAcross == false
+	->  TargetLang = SourceLang
+	;   true
+	),
 
-create_results([], _, _, _, []).
-create_results([Targets|Tail], Source, OldProv, MatchProv, Results):-
-	create_result_list(Targets, Source, OldProv, MatchProv, Results0),
-	create_results(Tail, Source, OldProv, MatchProv, Results1),
-	append(Results0, Results1, Results).
+	(   CaseSensitive
+	->  SearchTarget=literal(lang(TargetLang, Token))
+	;   SearchTarget=literal(exact(Token), lang(TargetLang, TargetLabel))
+	),
 
-create_result_list([]-_-_, _, _, _, []).
-create_result_list([T-TProv|Tail]-L-Count, Source, OldProv, MatchProv, [A|Results]):-
-	create_result(T-L-TProv-Count, Source, OldProv, MatchProv, A),
-	create_result_list(Tail-L-Count, Source, OldProv, MatchProv, Results).
+	skos_match(Source, MatchPropS,
+		   literal(lang(SourceLang, SourceLabel)),
+		   SourceProp, Options),
+	SourceLabel \= '',
+	rdf_tokenize_literal(SourceLabel, Tokens),
+	length(Tokens, TokenLength), TokenLength > 0,
+	member(Token, Tokens), atom(Token),
+	skos_match(Target, MatchPropT, SearchTarget, TargetProp, Options),
 
-create_result(Target-L-TargetProvGraph-Count, Source, OldProv, MatchProv,
-	      align(Source, Target, [[token(L),token_ambiguity(Count),graph(ProvGraph)|Rest]|OldProv])) :-
-	select_option(graph(Graph), MatchProv, Rest),
-	append(Graph, TargetProvGraph, ProvGraph).
+	(   option(target_scheme(TargetScheme), Options)
+	->  vocab_member(Target, TargetScheme)
+	;   true
+	),
 
-match_label(Source, Label, Targets, Options) :-
-	rdf_equal(rdfs:label, RdfsLabel),
-	option(targetlabel(MatchProp), Options, RdfsLabel),
-	option(matchacross_lang(MatchAcross), Options, true),
-	option(matchacross_type(IgnoreType),  Options, true),
-	option(case_sensitive(CaseSensitive), Options, false),
-	option(source_lang(SourceLang), Options),
+	(   IgnoreType
+	->  true
+	;   matching_types(Source, Target)
+	),
 
-	(   CaseSensitive
-	->  SearchTarget=literal(lang(TargetLang, Label))
-	;   SearchTarget=literal(exact(Label), lang(TargetLang, TargetLabel))
+	% if matching label has no lang tag, these are still not grounded:
+	(   var(SourceLang)
+	->  SourceTerm = literal(SourceLabel)
+	;   SourceTerm = literal(lang(SourceLang, SourceLabel))
 	),
 
-        % If we cannot match across languages, set target language to source language
-	(   MatchAcross == false
-	->  TargetLang = SourceLang
-	;   true
+	(   var(TargetLang)
+	->  TargetTerm = literal(TargetLabel)
+	;   TargetTerm = literal(lang(TargetLang, TargetLabel))
 	),
 
-	findall(Target-ProvGraph,
-		(   skos_match(Target, MatchProp, SearchTarget,
-			       TargetProp, Options),
-		    (	option(target_scheme(TargetScheme), Options)
-		    ->	vocab_member(Target, TargetScheme)
-		    ;	true
-		    ),
-		    (   IgnoreType
-		    ->  true
-		    ;   matching_types(Source, Target)
-		    ),
-		    (   var(TargetLang)
-		    ->  TargetTerm = literal(TargetLabel)
-		    ;   TargetTerm = literal(lang(TargetLang, TargetLabel))
-		    ),
-		    ProvGraph = [rdf(Target, TargetProp, TargetTerm)]
-		),
-		Targets),
-	Targets \= [].
+	Prov = [method(compound_label),
+		score(nr_of_tokens(TokenLength)),
+		graph([rdf(Source, SourceProp, SourceTerm),
+		       rdf(Target, TargetProp, TargetTerm)])
+	       ].
diff --git a/lib/ag_modules/compound_label_selecter.pl b/lib/ag_modules/compound_label_selecter.pl
index 154d950..d4ce0f0 100644
--- a/lib/ag_modules/compound_label_selecter.pl
+++ b/lib/ag_modules/compound_label_selecter.pl
@@ -5,7 +5,6 @@
 :- public selecter/5.
 :- public parameter/4.
 
-:- use_module(library(lists)).
 :- use_module(library(semweb/rdf_db)).
 :- use_module(label_selecter).
 :- use_module(compound_label_match).
@@ -38,5 +37,5 @@ parameter(match_qualified_only, boolean, false,
 amalgame_module(amalgame:'CompoundLabelSelecter').
 
 selecter(In, Sel, Dis, Und, Options) :-
-	label_selecter(compound_label_match, In, SelList, Dis, Und, Options),
-	append(SelList, Sel). % HACK FIX ME
+	label_selecter(compound_label_match, In, Sel, Dis, Und, Options).
+