amalgame/commit

ADDED: isub partitioner, refactorized exact/isub modules

authorJacco van Ossenbruggen
Sat Sep 6 16:53:17 2014 +0200
committerJacco van Ossenbruggen
Sat Sep 6 16:53:17 2014 +0200
commit3940c0f502c25dd9eab9f5a80d6531cf648f6c7a
tree79a9654bb57190ed9dd73b7ad16bbbab2fc71146
parent7d710a9010a2aef6ccd0460ec5fc19419cb3c2bd
Diff style: patch stat
diff --git a/config-available/ag_modules.pl b/config-available/ag_modules.pl
index 06a01e7..7558085 100644
--- a/config-available/ag_modules.pl
+++ b/config-available/ag_modules.pl
@@ -4,11 +4,15 @@
 :- use_module(library(ag_modules/ancestor_generator)).
 :- use_module(library(ag_modules/descendent_generator)).
 :- use_module(library(ag_modules/exact_label_generator)).
+:- use_module(library(ag_modules/isub_generator)).
+:- use_module(library(ag_modules/related_generator)).
 
 % Mapping producing partitioners:
 :- use_module(library(ag_modules/ancestor_selecter)).
 :- use_module(library(ag_modules/descendent_selecter)).
 :- use_module(library(ag_modules/exact_label_selecter)).
+:- use_module(library(ag_modules/isub_selecter)).
+:- use_module(library(ag_modules/related_selecter)).
 
 % Vocabulary filters/selecters
 :- use_module(library(ag_modules/voc_exclude)).
@@ -19,8 +23,6 @@
 % Modules that can be used as matchers
 :- use_module(library(ag_modules/compound_match)).
 :- use_module(library(ag_modules/snowball_match)).
-:- use_module(library(ag_modules/isub_match)).
-:- use_module(library(ag_modules/related_match)).
 :- use_module(library(ag_modules/preloaded_mapping)).
 
 % Alignment filters/selecters
@@ -31,7 +33,6 @@
 :- use_module(library(ag_modules/most_methods)).
 :- use_module(library(ag_modules/most_labels)).
 :- use_module(library(ag_modules/most_generic)).
-:- use_module(library(ag_modules/related_selecter)).
 :- use_module(library(ag_modules/sibling_selecter)).
 :- use_module(library(ag_modules/preloaded_selecter)).
 
diff --git a/lib/ag_modules/exact_label_selecter.pl b/lib/ag_modules/exact_label_selecter.pl
index 7158fdb..22a323d 100644
--- a/lib/ag_modules/exact_label_selecter.pl
+++ b/lib/ag_modules/exact_label_selecter.pl
@@ -5,8 +5,6 @@
 :- public selecter/5.
 :- public parameter/4.
 
-:- use_module(library(sort)).
-:- use_module(library(amalgame/map)).
 :- use_module(label_selecter).
 :- use_module(exact_label_match).
 :- use_module(string_match_util).
@@ -38,12 +36,4 @@ parameter(match_qualified_only, boolean, false,
 amalgame_module(amalgame:'ExactLabelSelecter').
 
 selecter(In, Sel, Dis, Und, Options) :-
-	option(type(SourceOrTarget), Options, all),
-	(   SourceOrTarget \= source
-	->  label_selecter(SourceOrTarget, exact_label_match, In, Sel, Dis, Und, Options)
-	;   predsort(ag_map:compare_align(target), In, InT),
-	    label_selecter(SourceOrTarget, exact_label_match, InT, Sel0, Dis0, Und0, Options),
-	    predsort(ag_map:compare_align(source), Sel0,  Sel),
-	    predsort(ag_map:compare_align(source), Dis0,  Dis),
-	    predsort(ag_map:compare_align(source), Und0,  Und)
-	).
+	label_selecter(exact_label_match, In, Sel, Dis, Und, Options).
diff --git a/lib/ag_modules/isub_generator.pl b/lib/ag_modules/isub_generator.pl
new file mode 100644
index 0000000..67dddba
--- /dev/null
+++ b/lib/ag_modules/isub_generator.pl
@@ -0,0 +1,45 @@
+:- module(isub_generator,
+	  []).
+
+:- use_module(library(amalgame/vocabulary)).
+:- use_module(isub_match).
+:- use_module(string_match_util).
+
+:- public matcher/4.
+:- public parameter/4.
+:- public amalgame_module/1.
+
+amalgame_module(amalgame:'IsubMatcher').
+
+parameter(sourcelabel, oneof(LabelProps), Default,
+	  '(Super)Property to get label of the source by') :-
+	rdf_equal(Default, rdfs:label),
+	label_list(LabelProps).
+parameter(targetlabel, oneof(LabelProps), Default,
+	  '(Super)Property to get the label of the target by') :-
+	rdf_equal(Default, rdfs:label),
+	label_list(LabelProps).
+parameter(threshold, float, 0.7,
+	  'threshold edit distance').
+parameter(language, oneof(['any'|L]), 'any', 'Language of source label') :-
+	strategy_languages(_,L).
+parameter(matchacross_lang,
+	  boolean, true,
+	  'Allow labels from different language to be matched').
+parameter(normalize,
+	  boolean, false,
+	  '(Case) normalize strings as described in the isub article').
+
+%%      matcher(+Source, +Target, -Mappings, +Options)
+%
+%       Mappings is a list of matches between instances of Source and
+%       Target.
+
+matcher(Source, Target, Mappings, Options) :-
+        findall(M, align(Source, Target, M, Options), Mappings0),
+	sort(Mappings0, Mappings).
+
+align(Source, Target, Match, Options) :-
+        vocab_member(S, Source),
+        vocab_member(T, Target),
+        isub_match(align(S,T,[]), Match, Options).
diff --git a/lib/ag_modules/isub_match.pl b/lib/ag_modules/isub_match.pl
index 6c8a33c..bb0dae6 100644
--- a/lib/ag_modules/isub_match.pl
+++ b/lib/ag_modules/isub_match.pl
@@ -1,72 +1,12 @@
 :- module(isub_match,
-	  []).
+	  [isub_match/3]).
 
+:- use_module(library(option)).
 :- use_module(library(semweb/rdf_db)).
 :- use_module(library(semweb/rdf_label)).
 :- use_module(library(isub)).
-:- use_module(library(amalgame/vocabulary)).
-:- use_module(string_match_util).
 
-:- public filter/3.
-:- public matcher/4.
-:- public parameter/4.
-:- public amalgame_module/1.
-
-amalgame_module(amalgame:'IsubMatcher').
-amalgame_module(amalgame:'IsubFilter').
-
-parameter(sourcelabel, oneof(LabelProps), Default,
-	  '(Super)Property to get label of the source by') :-
-	rdf_equal(Default, rdfs:label),
-	label_list(LabelProps).
-parameter(targetlabel, oneof(LabelProps), Default,
-	  '(Super)Property to get the label of the target by') :-
-	rdf_equal(Default, rdfs:label),
-	label_list(LabelProps).
-parameter(threshold, float, 0.7,
-	  'threshold edit distance').
-parameter(language, oneof(['any'|L]), 'any', 'Language of source label') :-
-	strategy_languages(_,L).
-parameter(matchacross_lang,
-	  boolean, true,
-	  'Allow labels from different language to be matched').
-parameter(normalize,
-	  boolean, false,
-	  '(Case) normalize strings as described in the isub article').
-
-%%      filter(+MappingsIn, -MappingsOut, +Options)
-%
-%       Filter mappings based on exact matching of labels.
-
-filter([], [], _).
-filter([align(S,T,P)|Cs], [C|Mappings], Options) :-
-        (   T = scheme(_)
-	->  vocab_member(T2, T),
-	    match(align(S,T2,P), C, Options)
-        ;   match(align(S,T,P), C, Options)
-        ),
-        !,
-        filter(Cs, Mappings, Options).
-filter([_|Cs], Mappings, Options) :-
-        filter(Cs, Mappings, Options).
-
-
-%%      matcher(+Source, +Target, -Mappings, +Options)
-%
-%       Mappings is a list of matches between instances of Source and
-%       Target.
-
-matcher(Source, Target, Mappings, Options) :-
-        findall(M, align(Source, Target, M, Options), Mappings0),
-	sort(Mappings0, Mappings).
-
-align(Source, Target, Match, Options) :-
-        vocab_member(S, Source),
-        vocab_member(T, Target),
-        match(align(S,T,[]), Match, Options).
-
-
-match(align(Source, Target, Prov0), align(Source, Target, [Prov|Prov0]), Options) :-
+isub_match(align(Source, Target, Prov0), align(Source, Target, [Prov|Prov0]), Options) :-
 	rdf_equal(skos:definition, DefaultProp),
 	option(threshold(Threshold), Options, 0.0),
 	option(sourcelabel(MatchProp1), Options, DefaultProp),
diff --git a/lib/ag_modules/isub_selecter.pl b/lib/ag_modules/isub_selecter.pl
new file mode 100644
index 0000000..18c47e7
--- /dev/null
+++ b/lib/ag_modules/isub_selecter.pl
@@ -0,0 +1,38 @@
+:- module(isub_selecter,
+	  []).
+
+:- public amalgame_module/1.
+:- public selecter/5.
+:- public parameter/4.
+
+:- use_module(label_selecter).
+:- use_module(string_match_util).
+:- use_module(isub_match).
+
+amalgame_module(amalgame:'IsubSelecter').
+
+parameter(type,
+	  oneof([source,target, all]), all,
+	 'Select all exact label matches or pick best source/target to disambiguate').
+
+parameter(sourcelabel, oneof(LabelProps), Default,
+	  '(Super)Property to get label of the source by') :-
+	rdf_equal(Default, rdfs:label),
+	label_list(LabelProps).
+parameter(targetlabel, oneof(LabelProps), Default,
+	  '(Super)Property to get the label of the target by') :-
+	rdf_equal(Default, rdfs:label),
+	label_list(LabelProps).
+parameter(threshold, float, 0.7,
+	  'threshold edit distance').
+parameter(language, oneof(['any'|L]), 'any', 'Language of source label') :-
+	strategy_languages(_,L).
+parameter(matchacross_lang,
+	  boolean, true,
+	  'Allow labels from different language to be matched').
+parameter(normalize,
+	  boolean, false,
+	  '(Case) normalize strings as described in the isub article').
+
+selecter(In, Sel, Dis, Und, Options) :-
+	label_selecter(isub_match, In, Sel, Dis, Und, Options).
diff --git a/lib/ag_modules/label_selecter.pl b/lib/ag_modules/label_selecter.pl
index 1a732f3..422a35c 100644
--- a/lib/ag_modules/label_selecter.pl
+++ b/lib/ag_modules/label_selecter.pl
@@ -1,12 +1,23 @@
 :- module(label_selecter,
-	  [ label_selecter/7
+	  [ label_selecter/6
 	  ]).
 
 /* This module provides a meta predicate label_selecter/7, which implements the selecter/5 predicate of the
  * label matching partitioners.
  */
 
-:- meta_predicate label_selecter(+, 3, +, -, -, -, +).
+:- meta_predicate label_selecter(3, +, -, -, -, +).
+
+label_selecter(Matcher, In, Sel, Dis, Und, Options) :-
+	option(type(SourceOrTarget), Options, all),
+	(   SourceOrTarget \= source
+	->  label_selecter(SourceOrTarget, Matcher, In, Sel, Dis, Und, Options)
+	;   predsort(ag_map:compare_align(target), In, InT),
+	    label_selecter(SourceOrTarget, Matcher, InT, Sel0, Dis0, Und0, Options),
+	    predsort(ag_map:compare_align(source), Sel0,  Sel),
+	    predsort(ag_map:compare_align(source), Dis0,  Dis),
+	    predsort(ag_map:compare_align(source), Und0,  Und)
+	).
 
 label_selecter(  _, _, [],  [],  [],  [], _).
 label_selecter(all, Matcher, [Head|Tail], Sel, Dis, [], Options) :-
@@ -14,6 +25,9 @@ label_selecter(all, Matcher, [Head|Tail], Sel, Dis, [], Options) :-
 	->  Sel = [Match|TSel],
 	    Dis = TDis
 	;   Sel = TSel,
-	    Dis = [Head|TDis]
+	    Head = align(S,T,P),
+	    MisEv = [method(Matcher), score([result(discarded)])],
+	    MisMatch = align(S,T, [MisEv|P]),
+	    Dis = [MisMatch|TDis]
 	),
 	label_selecter(all, Matcher, Tail, TSel, TDis, [], Options).
diff --git a/rdf/tool/ag_modules.ttl b/rdf/tool/ag_modules.ttl
index 54b7f47..4c37366 100644
--- a/rdf/tool/ag_modules.ttl
+++ b/rdf/tool/ag_modules.ttl
@@ -22,6 +22,11 @@ amalgame:ExactLabelMatcher
     skos:definition "Generate new candidates based on exact matching labels of source and target concepts."@en ;
     rdfs:subClassOf amalgame:CandidateGenerator .
 
+amalgame:IsubMatcher
+    rdfs:label "generate/label/similarity"@en ;
+    skos:definition "Generate new candidates based on similar labels. The matcher is based on the 'isub' metric introduced in 'A string metric for ontology alignment' by Giorgos Stoilos, 2005."@en ;
+    rdfs:subClassOf amalgame:CandidateGenerator .
+
 amalgame:RelatedMatcher
     amalgame:need_secondary_inputs true ;
     rdfs:label "generate/structure/related"@en ;
@@ -47,6 +52,11 @@ amalgame:ExactLabelSelecter
     skos:definition "Select mappings with the most matching labels, discard others for the same source/target. If type=all, all candidates with matching labels are selected."@en ;
     rdfs:subClassOf amalgame:MappingPartitioner .
 
+amalgame:IsubSelecter
+    rdfs:label "partition/label/similarity"@en ;
+    skos:definition "Select mappings with the most similar labels, discard others for the same source/target.  If type=all, all candidates with sufficiently similar labels are selected. The matcher is based on the 'isub' metric introduced in 'A string metric for ontology alignment' by Giorgos Stoilos, 2005."@en ;
+    rdfs:subClassOf amalgame:MappingPartitioner .
+
 amalgame:RelatedSelecter
     amalgame:need_secondary_inputs true ;
     rdfs:label "partition/structure/related"@en ;
@@ -103,11 +113,6 @@ amalgame:SnowballMatcher
     skos:definition "A label matcher with similarity based on (snowball) stemming."@en ;
     rdfs:subClassOf amalgame:Matcher .
 
-amalgame:IsubMatcher
-    rdfs:label "string/similarity"@en ;
-    skos:definition "A string similarity matcher based on the 'isub' metric introduced in 'A string metric for ontology alignment' by Giorgos Stoilos, 2005."@en ;
-    rdfs:subClassOf amalgame:Matcher .
-
 amalgame:AritySelect
     rdfs:label "ambiguity/remove"@en ;
     skos:definition "Select correspondences with a unique source, target or both, discard others"@en;