amalgame/commit

work in progress on return of related matcher

authorJacco van Ossenbruggen
Sat Aug 30 12:22:37 2014 +0200
committerJacco van Ossenbruggen
Sat Aug 30 12:22:37 2014 +0200
commit85987569600349c7570dfd7fbe4ed81d80ccb3de
treee5597462008eb8cfd0aed03791e64b609f70a476
parent376cdd662f03dbbafd20c38ada28be076ef61745
Diff style: patch stat
diff --git a/lib/ag_modules/related.pl b/lib/ag_modules/related.pl
new file mode 100644
index 0000000..4b5bae5
--- /dev/null
+++ b/lib/ag_modules/related.pl
@@ -0,0 +1,21 @@
+:- module(related_match_util,
+	  [ related_match/4]).
+
+:- use_module(library(semweb/rdf_db)).
+:- use_module(library(skos/util)).
+
+related_match(align(S, T, Prov0), BackgroundMatches, align(S, T, [Prov|Prov0]), Options) :-
+	option(steps(MaxSteps), Options),
+	related(S, MaxSteps, AncS, R1, Steps1),
+	related(T, MaxSteps, AncT, R2, Steps2),
+	get_assoc(AncS-AncT, BackgroundMatches, _),
+	Prov = [method(related_match),
+		source(AncS),
+		target(AncT),
+		steps(Steps1/Steps2),
+		graph([R1,R2])
+	       ].
+
+related(R, MaxSteps, Related, rdf_reachable(R, Prop, Related), Steps) :-
+	skos_related_to(R, Related, MaxSteps, Steps),
+	rdf_equal(skos:related, Prop).
diff --git a/lib/ag_modules/related_match.pl b/lib/ag_modules/related_match.pl
index 4d17975..259fd84 100644
--- a/lib/ag_modules/related_match.pl
+++ b/lib/ag_modules/related_match.pl
@@ -1,9 +1,10 @@
 :- module(related_match,
 	  []).
 
-:- use_module(library(semweb/rdf_db)).
+:- use_module(library(assoc)).
+:- use_module(library(option)).
 :- use_module(library(amalgame/vocabulary)).
-:- use_module(library(amalgame/map)).
+:- use_module(related).
 
 :- public amalgame_module/1.
 :- public filter/3.
@@ -13,8 +14,6 @@
 amalgame_module(amalgame:'RelatedMatcher').
 amalgame_module(amalgame:'RelatedFilter').
 
-parameter(graph, atom, 'DEFAULT_GRAPH',
-	  'named graph to query for related concepts, defaults to full repository').
 parameter(steps, integer, 1,
 	  'depth of search, defaults to 1, e.g. direct related concepts only').
 
@@ -22,19 +21,12 @@ parameter(steps, integer, 1,
 %
 %	Filter mappings based on exact matching of labels.
 
-filter([], [], _).
-filter([align(S,T,P)|Cs], [C|Mappings], Options) :-
-	(   T = scheme(_)
-	->  match(align(S,_,P), C, Options),
-	    C=align(_,T2,_),
-	    vocab_member(T2, T)
-	;   match(align(S,T,P), C, Options)
-	),
-	!,
-	filter(Cs, Mappings, Options).
-filter([_|Cs], Mappings, Options) :-
-	filter(Cs, Mappings, Options).
-
+filter(In, Out, Options) :-
+	option(snd_input(SecList), Options),
+	findall(S-T-P, member(align(S,T,P), SecList), KeyValueList),
+	keysort(KeyValueList, Deduped),
+	ord_list_to_assoc(Deduped, BackgroundMatches),
+	filter_(In, BackgroundMatches, Out, Options).
 
 %%	matcher(+Source, +Target, -Mappings, +Options)
 %
@@ -42,37 +34,28 @@ filter([_|Cs], Mappings, Options) :-
 %	Target.
 
 matcher(Source, Target, Mappings, Options) :-
-	findall(M, align(Source, Target, M, Options), Mappings0),
+	option(snd_input(SecList), Options),
+	findall(S-T-P, member(align(S,T,P), SecList), KeyValueList),
+	keysort(KeyValueList, Deduped),
+	ord_list_to_assoc(Deduped, BackgroundMatches),
+	findall(M, align(Source, Target, BackgroundMatches, M, Options), Mappings0),
 	sort(Mappings0, Mappings).
 
-align(Source, Target, Match, Options) :-
-	vocab_member(S, Source),
-	vocab_member(T, Target),
-	match(align(S,T,[]), Match, Options).
-
-
-match(align(S, T, Prov0), align(S, T, [Prov|Prov0]), Options) :-
-	(   option(graph(Graph), Options, 'DEFAULT_GRAPH'), Graph \== 'DEFAULT_GRAPH'
-	->  true
-	;   Graph = _
+filter_([], _, [], _).
+filter_([align(S,T,P)|Cs], BackgroundMatches, [C|Mappings], Options) :-
+	(   S = 'http://zbw.eu/stw/descriptor/18781-5' -> gtrace; true),
+	(   T = scheme(_)
+	->  related_match(align(S,_,P), BackgroundMatches, C, Options),
+	    C=align(_,T2,_),
+	    vocab_member(T2, T)
+	;   related_match(align(S,T,P), BackgroundMatches, C, Options)
 	),
-	option(steps(MaxSteps), Options),
-
-	related(S, MaxSteps, AncS, R1, _Steps1),
-	related(T, MaxSteps, AncT, R2, _Steps2),
-	has_correspondence(align(AncS,AncT,P), Graph),
-	member(O,P),
-	memberchk(relation(R), O),
-	Prov = [method(related_match),
-		graph([R1,R2,rdf(AncS,R,AncT)])
-	       ].
+	!,
+	filter_(Cs, BackgroundMatches, Mappings, Options).
+filter_([_|Cs], BackgroundMatches, Mappings, Options) :-
+	filter_(Cs, BackgroundMatches, Mappings, Options).
 
-related(R, MaxSteps, Parent, rdf_reachable(R, Prop, Parent), Steps) :-
-	rdf_equal(skos:related, Prop),
-	rdf_reachable(R, Prop, Parent, MaxSteps, Steps),
-	\+ R == Parent.
-related(R, MaxSteps, Parent, rdf_reachable(Parent, Prop, R), Steps) :-
-	rdf_equal(skos:related, Prop),
-	rdf_reachable(Parent, Prop, R, MaxSteps, Steps),
-	\+ R == Parent,
-	\+ rdf_reachable(R, skos:related, Parent).
+align(Source, Target, BackgroundMatches, Match, Options) :-
+	vocab_member(S, Source),
+	vocab_member(T, Target),
+	related_match(align(S,T,[]), BackgroundMatches, Match, Options).
diff --git a/lib/ag_modules/related_selecter.pl b/lib/ag_modules/related_selecter.pl
new file mode 100644
index 0000000..884c962
--- /dev/null
+++ b/lib/ag_modules/related_selecter.pl
@@ -0,0 +1,73 @@
+:- module(related_selecter,
+	  []).
+
+:- use_module(library(apply)).
+:- use_module(library(assoc)).
+:- use_module(library(lists)).
+:- use_module(library(option)).
+:- use_module(library(pairs)).
+:- use_module(library(sort)).
+:- use_module(library(amalgame/map)).
+:- use_module(related).
+
+:- public amalgame_module/1.
+:- public selecter/5.
+:- public parameter/4.
+
+amalgame_module(amalgame:'RelatedSelecter').
+
+parameter(steps, integer, 1,
+	  'depth of search, defaults to 1, e.g. direct related concepts only').
+parameter(type,
+          oneof([source, target]), source,
+          'Discard other matches with the same source/target').
+
+selecter(In, Sel, Dis, Und, Options) :-
+	option(snd_input(SecList), Options),
+	option(type(SourceOrTarget), Options, source),
+	findall(S-T-P, member(align(S,T,P), SecList), KeyValueList),
+	keysort(KeyValueList, Deduped),
+	ord_list_to_assoc(Deduped, BackgroundMatches),
+	(   SourceOrTarget = source
+	->  selecter_(SourceOrTarget, In, BackgroundMatches, Sel, Dis, Und, Options)
+	;   predsort(ag_map:compare_align(target), In, InT),
+	    selecter_(SourceOrTarget, InT, BackgroundMatches, Sel0, Dis0, Und0, Options),
+	    predsort(ag_map:compare_align(source), Sel0,  Sel),
+	    predsort(ag_map:compare_align(source), Dis0,  Dis),
+	    predsort(ag_map:compare_align(source), Und0,  Und)
+	).
+
+selecter_(_, [], _, [], [], [], _).
+selecter_(Type, [Head|Tail], BackgroundMatches, Sel, Dis, Und, Options) :-
+	Head = align(S,T,_),
+	(   Type == source
+	->  same_source(Tail, S, Same, Rest)
+	;   same_target(Tail, T, Same, Rest)
+	),
+	selecter_(Type, Rest, BackgroundMatches, TailSel, TailDis, TailUnd, Options),
+	Candidates = [Head|Same],
+	maplist(related_count(BackgroundMatches, Options), Candidates, Counts0),
+	keysort(Counts0, Counts),
+	partition(zero_key, Counts, Zero, NonZero),
+	(   NonZero \= []
+	->  pairs_values(Zero,   DisgardedSame),
+	    pairs_values(NonZero, SelectedSame),
+	    append([SelectedSame,  TailSel], Sel),
+	    append([DisgardedSame, TailDis], Dis),
+	    Und = TailUnd
+	;   Sel = TailSel,
+	    Dis = TailDis,
+	    append([Candidates, TailUnd], Und)
+	).
+
+zero_key(0-_Value).
+
+related_count(BackgroundMatches, Options, Corr, Count-Merged) :-
+	findall(Match,
+		related_match(Corr, BackgroundMatches, Match, Options),
+		Matches),
+	length(Matches, Count),
+	(   Count > 0
+	->  merge_provenance(Matches, [Merged])
+	;   Merged = Corr
+	).