amalgame/commit

IMPROVED: ancestor match/filter has parameters different from ancestor selector, split up

authorJacco van Ossenbruggen
Sat Aug 30 15:26:48 2014 +0200
committerJacco van Ossenbruggen
Sat Aug 30 15:26:48 2014 +0200
commitf391747976ab241e4ca82d322b22014abc340750
tree39fdbe712f8d5372b03d4f3be038ad8e80f19b35
parent85987569600349c7570dfd7fbe4ed81d80ccb3de
Diff style: patch stat
diff --git a/config-available/ag_modules.pl b/config-available/ag_modules.pl
index 8a140e5..755a122 100644
--- a/config-available/ag_modules.pl
+++ b/config-available/ag_modules.pl
@@ -1,14 +1,14 @@
 :- module(conf_ag_modules, []).
 
 
-% Modules that can be used as matchers and as alignment filters/selecters
+% Modules that can be used as matchers
 :- use_module(library(ag_modules/exact_label_match)).
 :- use_module(library(ag_modules/compound_match)).
 :- use_module(library(ag_modules/snowball_match)).
 :- use_module(library(ag_modules/isub_match)).
 :- use_module(library(ag_modules/ancestor_match)).
 :- use_module(library(ag_modules/descendent_match)).
-% :- use_module(library(ag_modules/related_match)). fix sec inputs first! see anc/dec matchers
+:- use_module(library(ag_modules/related_match)).
 :- use_module(library(ag_modules/preloaded_mapping)).
 
 % Alignment filters/selecters
@@ -19,6 +19,8 @@
 :- use_module(library(ag_modules/most_methods)).
 :- use_module(library(ag_modules/most_labels)).
 :- use_module(library(ag_modules/most_generic)).
+:- use_module(library(ag_modules/ancestor_selecter)).
+:- use_module(library(ag_modules/related_selecter)).
 :- use_module(library(ag_modules/sibling_selecter)).
 
 % Vocabulary filters/selecters
diff --git a/lib/ag_modules/ancestor.pl b/lib/ag_modules/ancestor.pl
new file mode 100644
index 0000000..52fe65c
--- /dev/null
+++ b/lib/ag_modules/ancestor.pl
@@ -0,0 +1,22 @@
+:- module(ancestor_match_util,
+	  [ ancestor_match/4
+	  ]).
+
+:- use_module(library(semweb/rdf_db)).
+:- use_module(library(skos/util)).
+
+ancestor_match(align(S, T, Prov0), BackgroundMatches, align(S, T, [Prov|Prov0]), Options) :-
+	option(steps(MaxSteps), Options),
+	ancestor(S, MaxSteps, AncS, R1, Steps1),
+	ancestor(T, MaxSteps, AncT, R2, Steps2),
+	get_assoc(AncS-AncT, BackgroundMatches, _),
+	Prov = [method(ancestor_match),
+		source(AncS),
+		target(AncT),
+		steps(Steps1/Steps2),
+		graph([R1,R2])
+	       ].
+
+ancestor(R, MaxSteps, Parent, rdf(R, Prop, Parent), Steps) :-
+	skos_descendant_of(Parent, R, MaxSteps, Steps),
+	rdf_equal(amalgame:descendant, Prop).
diff --git a/lib/ag_modules/ancestor_match.pl b/lib/ag_modules/ancestor_match.pl
index b6b6f1f..3377727 100644
--- a/lib/ag_modules/ancestor_match.pl
+++ b/lib/ag_modules/ancestor_match.pl
@@ -1,30 +1,26 @@
 :- module(ancestor_match,
 	  []).
 
-:- use_module(library(semweb/rdf_db)).
-:- use_module(library(skos/util)).
+:- use_module(library(assoc)).
+:- use_module(library(lists)).
+:- use_module(library(option)).
 :- use_module(library(amalgame/vocabulary)).
-:- use_module(library(amalgame/map)).
+:- use_module(ancestor).
 
 :- public amalgame_module/1.
 :- public filter/3.
 :- public matcher/4.
-:- public selecter/5.
 :- public parameter/4.
 
 amalgame_module(amalgame:'AncestorMatcher').
 amalgame_module(amalgame:'AncestorFilter').
-amalgame_module(amalgame:'AncestorSelecter').
 
 parameter(steps, integer, 1,
 	  'depth of search, defaults to 1, e.g. direct parents only').
-parameter(type,
-          oneof([source, target]), source,
-          'Discard other matches with the same source/target').
 
 %%	filter(+MappingsIn, -MappingsOut, +Options)
 %
-%	Filter mappings based on exact matching of labels.
+%	Filter mappings based on ancestor matches in snd_input.
 
 filter(In, Out, Options) :-
 	option(snd_input(SecList), Options),
@@ -33,20 +29,6 @@ filter(In, Out, Options) :-
 	ord_list_to_assoc(Deduped, BackgroundMatches),
 	filter_(In, BackgroundMatches, Out, Options).
 
-filter_([], _, [], _).
-filter_([align(S,T,P)|Cs], BackgroundMatches, [C|Mappings], Options) :-
-	(   T = scheme(_)
-	->  match(align(S,_,P), BackgroundMatches, C, Options),
-	    C=align(_,T2,_),
-	    vocab_member(T2, T)
-	;   match(align(S,T,P), BackgroundMatches, C, Options)
-	),
-	!,
-	filter_(Cs, BackgroundMatches, Mappings, Options).
-filter_([_|Cs], BackgroundMatches, Mappings, Options) :-
-	filter_(Cs, BackgroundMatches, Mappings, Options).
-
-
 %%	matcher(+Source, +Target, -Mappings, +Options)
 %
 %	Mappings is a list of matches between instances of Source and
@@ -60,72 +42,21 @@ matcher(Source, Target, Mappings, Options) :-
 	findall(M, align(Source, Target, BackgroundMatches, M, Options), Mappings0),
 	sort(Mappings0, Mappings).
 
+filter_([], _, [], _).
+filter_([align(S,T,P)|Cs], BackgroundMatches, [C|Mappings], Options) :-
+	(   T = scheme(_)
+	->  ancestor_match(align(S,_,P), BackgroundMatches, C, Options),
+	    C=align(_,T2,_),
+	    vocab_member(T2, T)
+	;   ancestor_match(align(S,T,P), BackgroundMatches, C, Options)
+	),
+	!,
+	filter_(Cs, BackgroundMatches, Mappings, Options).
+filter_([_|Cs], BackgroundMatches, Mappings, Options) :-
+	filter_(Cs, BackgroundMatches, Mappings, Options).
+
+
 align(Source, Target, BackgroundMatches, Match, Options) :-
 	vocab_member(S, Source),
 	vocab_member(T, Target),
-	match(align(S,T,[]), BackgroundMatches, Match, Options).
-
-
-match(align(S, T, Prov0), BackgroundMatches, align(S, T, [Prov|Prov0]), Options) :-
-	option(steps(MaxSteps), Options),
-	ancestor(S, MaxSteps, AncS, R1, Steps1),
-	ancestor(T, MaxSteps, AncT, R2, Steps2),
-	get_assoc(AncS-AncT, BackgroundMatches, _),
-	Prov = [method(ancestor_match),
-		source(AncS),
-		target(AncT),
-		steps(Steps1/Steps2),
-		graph([R1,R2])
-	       ].
-
-ancestor(R, MaxSteps, Parent, rdf(R, Prop, Parent), Steps) :-
-	skos_descendant_of(Parent, R, MaxSteps, Steps),
-	rdf_equal(amalgame:descendant, Prop).
-
-selecter(In, Sel, Dis, Und, Options) :-
-	option(snd_input(SecList), Options),
-	option(type(SourceOrTarget), Options, source),
-	findall(S-T-P, member(align(S,T,P), SecList), KeyValueList),
-	keysort(KeyValueList, Deduped),
-	ord_list_to_assoc(Deduped, BackgroundMatches),
-	(   SourceOrTarget = source
-	->  selecter_(SourceOrTarget, In, BackgroundMatches, Sel, Dis, Und, Options)
-	;   predsort(ag_map:compare_align(target), In, InT),
-	    selecter_(SourceOrTarget, InT, BackgroundMatches, Sel0, Dis0, Und0, Options),
-	    predsort(ag_map:compare_align(source), Sel0,  Sel),
-	    predsort(ag_map:compare_align(source), Dis0,  Dis),
-	    predsort(ag_map:compare_align(source), Und0,  Und)
-	).
-
-selecter_(_, [], _, [], [], [], _).
-selecter_(Type, [Head|Tail], BackgroundMatches, Sel, Dis, Und, Options) :-
-	Head = align(S,T,_),
-	(   Type == source
-	->  same_source(Tail, S, Same, Rest)
-	;   same_target(Tail, T, Same, Rest)
-	),
-	selecter_(Type, Rest, BackgroundMatches, TailSel, TailDis, TailUnd, Options),
-	Candidates = [Head|Same],
-	maplist(ancestor_count(BackgroundMatches, Options), Candidates, Counts0),
-	keysort(Counts0, Counts),
-	partition(=(0-_), Counts, Zero, NonZero),
-	(   NonZero \= []
-	->  pairs_values(Zero,   DisgardedSame),
-	    pairs_values(NonZero, SelectedSame),
-	    append([SelectedSame,  TailSel], Sel),
-	    append([DisgardedSame, TailDis], Dis),
-	    Und = TailUnd
-	;   Sel = TailSel,
-	    Dis = TailDis,
-	    append([Candidates, TailUnd], Und)
-	).
-
-ancestor_count(BackgroundMatches, Options, Corr, Count-Merged) :-
-	findall(Match,
-		match(Corr, BackgroundMatches, Match, Options),
-		Matches),
-	length(Matches, Count),
-	(   Count > 0
-	->  merge_provenance(Matches, [Merged])
-	;   Merged = Corr
-	).
+	ancestor_match(align(S,T,[]), BackgroundMatches, Match, Options).
diff --git a/lib/ag_modules/ancestor_selecter.pl b/lib/ag_modules/ancestor_selecter.pl
new file mode 100644
index 0000000..196d19f
--- /dev/null
+++ b/lib/ag_modules/ancestor_selecter.pl
@@ -0,0 +1,74 @@
+:- module(ancestor_selecter,
+	  []).
+
+:- use_module(library(apply)).
+:- use_module(library(assoc)).
+:- use_module(library(lists)).
+:- use_module(library(option)).
+:- use_module(library(pairs)).
+:- use_module(library(sort)).
+
+:- use_module(library(amalgame/map)).
+:- use_module(ancestor).
+
+:- public amalgame_module/1.
+:- public selecter/5.
+:- public parameter/4.
+
+amalgame_module(amalgame:'AncestorSelecter').
+
+parameter(steps, integer, 1,
+	  'depth of search, defaults to 1, e.g. direct parents only').
+parameter(type,
+          oneof([source, target]), source,
+          'Discard other matches with the same source/target').
+
+selecter(In, Sel, Dis, Und, Options) :-
+	option(snd_input(SecList), Options),
+	option(type(SourceOrTarget), Options, source),
+	findall(S-T-P, member(align(S,T,P), SecList), KeyValueList),
+	keysort(KeyValueList, Deduped),
+	ord_list_to_assoc(Deduped, BackgroundMatches),
+	(   SourceOrTarget = source
+	->  selecter_(SourceOrTarget, In, BackgroundMatches, Sel, Dis, Und, Options)
+	;   predsort(ag_map:compare_align(target), In, InT),
+	    selecter_(SourceOrTarget, InT, BackgroundMatches, Sel0, Dis0, Und0, Options),
+	    predsort(ag_map:compare_align(source), Sel0,  Sel),
+	    predsort(ag_map:compare_align(source), Dis0,  Dis),
+	    predsort(ag_map:compare_align(source), Und0,  Und)
+	).
+
+selecter_(_, [], _, [], [], [], _).
+selecter_(Type, [Head|Tail], BackgroundMatches, Sel, Dis, Und, Options) :-
+	Head = align(S,T,_),
+	(   Type == source
+	->  same_source(Tail, S, Same, Rest)
+	;   same_target(Tail, T, Same, Rest)
+	),
+	selecter_(Type, Rest, BackgroundMatches, TailSel, TailDis, TailUnd, Options),
+	Candidates = [Head|Same],
+	maplist(ancestor_count(BackgroundMatches, Options), Candidates, Counts0),
+	keysort(Counts0, Counts),
+	partition(zero_key, Counts, Zero, NonZero),
+	(   NonZero \= []
+	->  pairs_values(Zero,   DisgardedSame),
+	    pairs_values(NonZero, SelectedSame),
+	    append([SelectedSame,  TailSel], Sel),
+	    append([DisgardedSame, TailDis], Dis),
+	    Und = TailUnd
+	;   Sel = TailSel,
+	    Dis = TailDis,
+	    append([Candidates, TailUnd], Und)
+	).
+
+zero_key(0-_Value).
+
+ancestor_count(BackgroundMatches, Options, Corr, Count-Merged) :-
+	findall(Match,
+		ancestor_match(Corr, BackgroundMatches, Match, Options),
+		Matches),
+	length(Matches, Count),
+	(   Count > 0
+	->  merge_provenance(Matches, [Merged])
+	;   Merged = Corr
+	).
diff --git a/rdf/tool/ag_modules.ttl b/rdf/tool/ag_modules.ttl
index dad65c4..c1a8da2 100644
--- a/rdf/tool/ag_modules.ttl
+++ b/rdf/tool/ag_modules.ttl
@@ -63,6 +63,12 @@ amalgame:RelatedMatcher
     skos:definition "Matcher looking for 1 or more related concepts that have already been mapped."@en ;
     rdfs:subClassOf amalgame:Matcher .
 
+amalgame:RelatedSelecter
+    amalgame:need_secondary_inputs true ;
+    rdfs:label "structure/related"@en ;
+    skos:definition "Select mappings with already mapped related concepts, discard others with the same source/target."@en ;
+    rdfs:subClassOf amalgame:MappingSelecter .
+
 amalgame:AritySelect
     rdfs:label "ambiguity remover"@en ;
     skos:definition "Remove correspondences without a unique source, target or both"@en;