amalgame/commit

CHANGED: exact_match splitted into generator and partitioner

authorJacco van Ossenbruggen
Sat Sep 6 11:51:20 2014 +0200
committerJacco van Ossenbruggen
Sat Sep 6 11:51:20 2014 +0200
commit8fe3ed2c1c9744b67da5e8891f907b836999064b
tree2fe7b1f12c58832b73cb8cec9a5f04661d837cd4
parentf21da2c375f960e188a245f4a2fafe66c7c70aba
Diff style: patch stat
diff --git a/config-available/ag_modules.pl b/config-available/ag_modules.pl
index 82542b2..9fb7528 100644
--- a/config-available/ag_modules.pl
+++ b/config-available/ag_modules.pl
@@ -1,8 +1,12 @@
 :- module(conf_ag_modules, []).
 
+% Candidate correspondence generator components:
+:- use_module(library(ag_modules/exact_label_generator)).
+
+% Mapping producing partitioners:
+:- use_module(library(ag_modules/exact_label_selecter)).
 
 % Modules that can be used as matchers
-:- use_module(library(ag_modules/exact_label_match)).
 :- use_module(library(ag_modules/compound_match)).
 :- use_module(library(ag_modules/snowball_match)).
 :- use_module(library(ag_modules/isub_match)).
diff --git a/lib/ag_modules/exact_label_generator.pl b/lib/ag_modules/exact_label_generator.pl
new file mode 100644
index 0000000..caa4bb3
--- /dev/null
+++ b/lib/ag_modules/exact_label_generator.pl
@@ -0,0 +1,63 @@
+:- module(exact_label_generator, []).
+
+:- use_module(library(semweb/rdf_db)).
+:- use_module(library(amalgame/vocabulary)).
+:- use_module(exact_label_match).
+:- use_module(string_match_util).
+
+:- public amalgame_module/1.
+:- public filter/3.
+:- public matcher/4.
+:- public parameter/4.
+
+amalgame_module(amalgame:'ExactLabelMatcher').
+amalgame_module(amalgame:'ExactLabelFilter').
+
+parameter(sourcelabel, oneof(LabelProps), Default,
+	  '(Super)Property to get label of the source by') :-
+	rdf_equal(Default, rdfs:label),
+	label_list(LabelProps).
+parameter(targetlabel, oneof(LabelProps), Default,
+	  '(Super)Property to get the label of the target by') :-
+	rdf_equal(Default, rdfs:label),
+	label_list(LabelProps).
+parameter(source_language, oneof(['any'|L]), 'any',
+	  'Language of source label') :-
+	strategy_languages(_S,L).
+parameter(matchacross_lang, boolean, true,
+	  'Allow labels from different language to be matched').
+parameter(matchacross_type, boolean, true,
+	  'Allow labels from different types to be matched').
+parameter(case_sensitive, boolean, false,
+	  'When true the case of labels must be equal').
+parameter(match_qualified_only, boolean, false,
+	  'Match only on the fully qualified label').
+
+%%	filter(+MappingsIn, -MappingsOut, +Options)
+%
+%	Filter mappings based on exact matching of labels.
+
+filter([], [], _).
+filter([align(S,T,P)|Cs], [C|Mappings], Options) :-
+	(   T = scheme(TargetScheme)
+	->  exact_label_match(align(S,_,P), C, [target_scheme(TargetScheme)|Options])
+	;   exact_label_match(align(S,T,P), C, Options)
+	),
+	!,
+	filter(Cs, Mappings, Options).
+filter([_|Cs], Mappings, Options) :-
+	filter(Cs, Mappings, Options).
+
+
+%%	matcher(+Source, +Target, -Mappings, +Options)
+%
+%	Mappings is a sorted list of matches between instances of Source
+%	and Target.
+
+matcher(Source, Target, Mappings, Options) :-
+	findall(M, align(Source, Target, M, Options), Mappings0),
+	sort(Mappings0, Mappings).
+
+align(Source, Target, Match, Options) :-
+	vocab_member(S, Source),
+	exact_label_match(align(S,_,[]), Match, [target_scheme(Target)|Options]).
diff --git a/lib/ag_modules/exact_label_match.pl b/lib/ag_modules/exact_label_match.pl
index 1a749d8..0a466f3 100644
--- a/lib/ag_modules/exact_label_match.pl
+++ b/lib/ag_modules/exact_label_match.pl
@@ -1,68 +1,13 @@
-:- module(exact_label_match,
-	  []).
+:- module(exact_label_match, [
+	      exact_label_match/3
+	  ]).
 
 :- use_module(library(semweb/rdf_db)).
 :- use_module(library(amalgame/vocabulary)).
 :- use_module(string_match_util).
 
-:- public amalgame_module/1.
-:- public filter/3.
-:- public matcher/4.
-:- public parameter/4.
-
-amalgame_module(amalgame:'ExactLabelMatcher').
-amalgame_module(amalgame:'ExactLabelFilter').
-
-parameter(sourcelabel, oneof(LabelProps), Default,
-	  '(Super)Property to get label of the source by') :-
-	rdf_equal(Default, rdfs:label),
-	label_list(LabelProps).
-parameter(targetlabel, oneof(LabelProps), Default,
-	  '(Super)Property to get the label of the target by') :-
-	rdf_equal(Default, rdfs:label),
-	label_list(LabelProps).
-parameter(source_language, oneof(['any'|L]), 'any',
-	  'Language of source label') :-
-	strategy_languages(_S,L).
-parameter(matchacross_lang, boolean, true,
-	  'Allow labels from different language to be matched').
-parameter(matchacross_type, boolean, true,
-	  'Allow labels from different types to be matched').
-parameter(case_sensitive, boolean, false,
-	  'When true the case of labels must be equal').
-parameter(match_qualified_only, boolean, false,
-	  'Match only on the fully qualified label').
-
-%%	filter(+MappingsIn, -MappingsOut, +Options)
-%
-%	Filter mappings based on exact matching of labels.
-
-filter([], [], _).
-filter([align(S,T,P)|Cs], [C|Mappings], Options) :-
-	(   T = scheme(TargetScheme)
-	->  match(align(S,_,P), C, [target_scheme(TargetScheme)|Options])
-	;   match(align(S,T,P), C, Options)
-	),
-	!,
-	filter(Cs, Mappings, Options).
-filter([_|Cs], Mappings, Options) :-
-	filter(Cs, Mappings, Options).
-
-
-%%	matcher(+Source, +Target, -Mappings, +Options)
-%
-%	Mappings is a sorted list of matches between instances of Source
-%	and Target.
-
-matcher(Source, Target, Mappings, Options) :-
-	findall(M, align(Source, Target, M, Options), Mappings0),
-	sort(Mappings0, Mappings).
-
-align(Source, Target, Match, Options) :-
-	vocab_member(S, Source),
-	match(align(S,_,[]), Match, [target_scheme(Target)|Options]).
-
-match(align(Source, Target, Prov0), align(Source, Target, [Prov|Prov0]), Options) :-
+exact_label_match(align(Source, Target, Prov0),
+		  align(Source, Target, [Prov|Prov0]), Options) :-
 	rdf_equal(rdfs:label, RdfsLabel),
 	option(sourcelabel(MatchPropS), Options, RdfsLabel),
 	option(targetlabel(MatchPropT), Options, RdfsLabel),
@@ -71,7 +16,7 @@ match(align(Source, Target, Prov0), align(Source, Target, [Prov|Prov0]), Options
 	option(case_sensitive(CaseSensitive), Options, false),
 	option(source_language(Lang), Options, 'any'),
 	(   Lang == 'any'
-	->  SourceLang = _
+	->  SourceLang = _UnBound
 	;   SourceLang = Lang
 	),
 
diff --git a/lib/ag_modules/exact_label_selecter.pl b/lib/ag_modules/exact_label_selecter.pl
new file mode 100644
index 0000000..7158fdb
--- /dev/null
+++ b/lib/ag_modules/exact_label_selecter.pl
@@ -0,0 +1,49 @@
+:- module(exact_label_selecter,
+	  []).
+
+:- public amalgame_module/1.
+:- public selecter/5.
+:- public parameter/4.
+
+:- use_module(library(sort)).
+:- use_module(library(amalgame/map)).
+:- use_module(label_selecter).
+:- use_module(exact_label_match).
+:- use_module(string_match_util).
+
+parameter(type,
+	  oneof([source,target, all]), all,
+	 'Select all exact label matches or pick best source/target to disambiguate').
+
+parameter(sourcelabel, oneof(LabelProps), Default,
+	  '(Super)Property to get label of the source by') :-
+	rdf_equal(Default, rdfs:label),
+	label_list(LabelProps).
+parameter(targetlabel, oneof(LabelProps), Default,
+	  '(Super)Property to get the label of the target by') :-
+	rdf_equal(Default, rdfs:label),
+	label_list(LabelProps).
+parameter(source_language, oneof(['any'|L]), 'any',
+	  'Language of source label') :-
+	strategy_languages(_S,L).
+parameter(matchacross_lang, boolean, true,
+	  'Allow labels from different language to be matched').
+parameter(matchacross_type, boolean, true,
+	  'Allow labels from different types to be matched').
+parameter(case_sensitive, boolean, false,
+	  'When true the case of labels must be equal').
+parameter(match_qualified_only, boolean, false,
+	  'Match only on the fully qualified label').
+
+amalgame_module(amalgame:'ExactLabelSelecter').
+
+selecter(In, Sel, Dis, Und, Options) :-
+	option(type(SourceOrTarget), Options, all),
+	(   SourceOrTarget \= source
+	->  label_selecter(SourceOrTarget, exact_label_match, In, Sel, Dis, Und, Options)
+	;   predsort(ag_map:compare_align(target), In, InT),
+	    label_selecter(SourceOrTarget, exact_label_match, InT, Sel0, Dis0, Und0, Options),
+	    predsort(ag_map:compare_align(source), Sel0,  Sel),
+	    predsort(ag_map:compare_align(source), Dis0,  Dis),
+	    predsort(ag_map:compare_align(source), Und0,  Und)
+	).
diff --git a/lib/ag_modules/label_selecter.pl b/lib/ag_modules/label_selecter.pl
new file mode 100644
index 0000000..1a732f3
--- /dev/null
+++ b/lib/ag_modules/label_selecter.pl
@@ -0,0 +1,19 @@
+:- module(label_selecter,
+	  [ label_selecter/7
+	  ]).
+
+/* This module provides a meta predicate label_selecter/7, which implements the selecter/5 predicate of the
+ * label matching partitioners.
+ */
+
+:- meta_predicate label_selecter(+, 3, +, -, -, -, +).
+
+label_selecter(  _, _, [],  [],  [],  [], _).
+label_selecter(all, Matcher, [Head|Tail], Sel, Dis, [], Options) :-
+	(   call(Matcher, Head, Match, Options)
+	->  Sel = [Match|TSel],
+	    Dis = TDis
+	;   Sel = TSel,
+	    Dis = [Head|TDis]
+	),
+	label_selecter(all, Matcher, Tail, TSel, TDis, [], Options).
diff --git a/rdf/tool/ag_modules.ttl b/rdf/tool/ag_modules.ttl
index 3bae959..e9a2d36 100644
--- a/rdf/tool/ag_modules.ttl
+++ b/rdf/tool/ag_modules.ttl
@@ -3,6 +3,18 @@
 @prefix skos: <http://www.w3.org/2004/02/skos/core#> .
 @prefix amalgame: <http://purl.org/vocabularies/amalgame#> .
 
+amalgame:ExactLabelMatcher
+    rdfs:label "generate/label (exact)"@en ;
+    skos:definition "Generate new candidates based on exact matching labels of source and target concepts"@en ;
+    rdfs:subClassOf amalgame:CandidateGenerator .
+
+amalgame:ExactLabelSelecter
+    rdfs:label "partition/label (exact)"@en ;
+    skos:definition "Partition existing candidates based on exact matching labels of source and target concepts"@en ;
+    rdfs:subClassOf amalgame:MappingPartitioner .
+
+####################
+
 amalgame:EvaluationProcess
     rdfs:label "Manual evaluation"@en ;
     skos:definition "A process class representing manual evaluation processes "@en ;
@@ -18,11 +30,6 @@ amalgame:SelectPreLoadedSelecter
     skos:definition "Select mappings with corresponding mappings in the preloaded mapping, discard others with the same source/target."@en ;
     rdfs:subClassOf amalgame:MappingPartitioner .
 
-amalgame:ExactLabelMatcher
-    rdfs:label "string/label (exact)"@en ;
-    skos:definition "A basic label matcher based on exact matching labels of source and target concepts"@en ;
-    rdfs:subClassOf amalgame:Matcher .
-
 amalgame:CompoundMatcher
     rdfs:label "string/label (compound)"@en ;
     skos:definition "A label matcher matching after compound splitting the label(s) of the source concepts"@en ;
diff --git a/rdf/tool/amalgame.ttl b/rdf/tool/amalgame.ttl
index 8a2bb2d..5e4bcdf 100644
--- a/rdf/tool/amalgame.ttl
+++ b/rdf/tool/amalgame.ttl
@@ -11,7 +11,7 @@
 amalgame:AlignmentStrategy
     a prov:Plan ;
     rdfs:label "Alignment strategy"@en ;
-    rdfs:comment "RDF representation of an alignment strategy that can be loaded and executed by Amalgame"@en.
+    rdfs:comment "RDF representation of an alignment strategy that can be loaded into, and executed by, Amalgame"@en.
 
 amalgame:Entity
     rdfs:label "Entity"@en ;
@@ -23,18 +23,44 @@ amalgame:Process
     rdfs:comment "Top class for all amalgame processes"@en ;
     rdfs:subClassOf prov:Activity .
 
+amalgame:Partitioner
+    rdfs:label "Partitioner"@en ;
+    rdfs:comment "A process that partitions an existing Entity into subsets"@en ;
+    rdfs:subClassOf amalgame:Process .
+
+amalgame:CandidateGenerator
+    rdfs:label "Candidate generator"@en ;
+    rdfs:comment "A process that generates a mapping with candidate correspondences."@en ;
+    rdfs:subClassOf amalgame:Matcher . # fix me, should become amalgame:Process .
+
+amalgame:MappingPartitioner
+    rdfs:label "Mapping partitioner"@en ;
+    skos:definition "Component that creates subsets from an existing mapping"@en ;
+    rdfs:subClassOf amalgame:Partitioner .
+
+amalgame:VocabPartitioner
+    rdfs:label "Vocabulary partitioner"@en ;
+    skos:definition "Component that creates subsets from an existing concept scheme"@en ;
+    rdfs:subClassOf amalgame:Partitioner .
+
+amalgame:VirtualVocabPartitioner 
+    rdfs:label "Virtual vocabulary partitioner"@en ;
+    skos:definition "Component that creates virtual subsets from an existing concept scheme"@en ;
+    skos:note "A virtual scheme is defined intentionally, not materialized in the store. Amalgame can iterate over all its 'inScheme' members and test whether a concept is inScheme or not."@en ;
+    rdfs:subClassOf amalgame:VocabPartitioner .
+
+###############################
+
+amalgame:Matcher
+    rdfs:label "Matcher (deprecated)"@en ;
+    skos:definition "Component that creates a mapping given a source and target vocabulary"@en ;
+    rdfs:subClassOf amalgame:Process .
+
 amalgame:parameters
     a rdf:Property ;
     rdfs:domain amalgame:Process ;
     rdfs:range rdfs:Literal .
 
-amalgame:Matcher
-    rdfs:label "Matcher"@en ;
-    skos:definition "Component that creates a mapping given a source and target vocabulary"@en ;
-    rdfs:subClassOf amalgame:Process .
-amalgame:Partitioner
-    rdfs:label "Partitioner"@en ;
-    rdfs:subClassOf amalgame:Process .
 amalgame:MultiInputComponent
     rdfs:label "Multi-input operator"@en ;
     skos:definition "Component with multiple first-class inputs"@en ;
@@ -43,16 +69,6 @@ amalgame:SetOperator
     rdfs:label "Set operator"@en ;
     skos:definition "Component that creates a mapping by combining the correspondences from a number of mappings"@en ;
     rdfs:subClassOf amalgame:MultiInputComponent .
-amalgame:MappingPartitioner
-    rdfs:label "Mapping Partitioner"@en ;
-    skos:definition "Component that creates subsets from an existing mapping"@en ;
-    rdfs:subClassOf amalgame:Partitioner .
-amalgame:VocabPartitioner
-    rdfs:label "Vocabulary Partitioner"@en ;
-    skos:definition "Component that creates subsets from an existing concept scheme"@en ;
-    rdfs:subClassOf amalgame:Partitioner .
-amalgame:VirtualVocabPartitioner 
-    rdfs:subClassOf amalgame:VocabPartitioner .
 amalgame:Merger
     rdfs:label "merger"@en ;
     skos:definition "Component that creates a mapping by mergin the correspondences from a number of mappings"@en ;