amalgame/commit

start with provenance process execution

authorMichiel Hildebrand
Fri Feb 11 20:09:45 2011 +0100
committerMichiel Hildebrand
Fri Feb 11 20:09:45 2011 +0100
commit25326c90b3db8de4b10bd0df258d5c8dee8b3987
tree90604e7c185592c1ae4052d40c009f80e2dbdc0d
parent2f7f122888ceafcbf1af8f206c8cad942f4fd7fe
Diff style: patch stat
diff --git a/lib/amalgame/alignment_graph.pl b/lib/amalgame/alignment_graph.pl
index 853f64b..c0b67c6 100644
--- a/lib/amalgame/alignment_graph.pl
+++ b/lib/amalgame/alignment_graph.pl
@@ -1,19 +1,98 @@
 :- module(alignment_graph,
-	  [ graph_member/2,
+	  [ e/2,
+	    flush_map_cache/0,
+	    flush_map_cache/1,
+	    graph_member/2,
 	    merge_graphs/2,
 	    merge_provenance/2,
 	    materialize_alignment_graph/2
 	  ]).
 
 :- use_module(library(semweb/rdf_db)).
+:- use_module(library(semweb/rdfs)).
 :- use_module(library(semweb/rdf_persistency)).
 :- use_module(library(amalgame/edoal)).
+:- use_module(library(amalgame/map)).
+
+% components
+:- use_module(library(amalgame/matchers/snowball_match)).
+:- use_module(library(amalgame/select/select1_1)).
+
+:- dynamic
+	map_cache/2.
+
+flush_map_cache :-
+	flush_map_cache(_).
+flush_map_cache(Id) :-
+	retractall(map_cache(Id,_)).
+
+
+e(Id, Mapping) :-
+	rdf_has(Id, opmv:wasGeneratedBy, Process),
+	rdf(Process, rdf:type, Type),
+	do_process(Process, Type, Id, Mapping).
+
+do_process(Process, Type, Id, Mapping) :-
+	rdfs_subclass_of(Type, amalgame:'Match'),
+	!,
+ 	rdf(Process, amalgame:source, Source),
+	rdf(Process, amalgame:target, Target),
+	process_options(Process, Options),
+ 	resource_to_term(Type, Module),
+	debug(align, 'running ~w matcher', [Module]),
+ 	call(Module:matcher, Source, Target, Mapping, Options),
+	assert(map_cache(Id, Mapping)).
+
+do_process(Process, Type, Id, Mapping) :-
+	rdfs_subclass_of(Type, amalgame:'Select'),
+	!,
+ 	rdf(Process, amalgame:source, Source),
+	process_options(Process, Options),
+ 	resource_to_term(Type, Module),
+	findall(A, graph_member(A, Source), Graph0),
+	sort(Graph0, Graph),
+	debug(align, 'running ~w select', [Module]),
+ 	call(Module:selecter, Graph, Selected, Discarded, Undecided, Options),
+	rdf_has(Id, opmv:used, _, P0),
+	resource_to_term(P0, P),
+	select_mapping(P, Selected, Discarded, Undecided, Mapping).
+
+select_mapping(selected, Selected, _, _, Selected).
+select_mapping(discarded, _, Discarded, _, Discarded).
+select_mapping(undecided, _, _, Undecided, Undecided).
+
+
+%%	process_options(+Process, -Options)
+%
+%
+
+process_options(Process, Options) :-
+	rdf(Process, amalgame:parameters, ParamString),
+	!,
+	param_string_to_options(ParamString, Options).
+process_options(_, []).
+
+param_string_to_options(S,S).
+
+
+
+%%	resource_to_term(+RDFResource, -PrologTerm)
+%
+%	Convert Amalgame RDF classes to Prolog predicates.
+
+resource_to_term(Resource, Term) :-
+	rdf_global_id(_:Local, Resource),
+	downcase_atom(Local, Term).
+
 
 %%	graph_member(?Element, ?Graph)
 %
 %	Enumarate elements of Graph. Where Graph is a list, a skos
 %	scheme URI or a named graph URI.
 
+graph_member(_, Var) :-
+	var(Var),
+	!.
 graph_member(E, List) :-
 	is_list(List),
 	!,
@@ -27,6 +106,28 @@ graph_member(E, type(Class)) :-
 graph_member(E, graph(Graph)) :-
 	!,
 	rdf_has(E, rdf:type, _, Graph).
+graph_member(E, Scheme) :-
+	rdfs_individual_of(Scheme, skos:'ConceptScheme'),
+	!,
+	rdf(E, skos:inScheme, Scheme).
+graph_member(E, Class) :-
+	rdfs_individual_of(Class, rdfs:'Class'),
+	!,
+	rdfs_individual_of(E, Class).
+graph_member(align(S,T,P), MappingId) :-
+	rdfs_individual_of(MappingId, amalgame:'Mapping'),
+	(   has_map(_,_,MappingId)
+	->  has_map([S-T], P, MappingId)
+	;   map_cache(MappingId, Mapping),
+	    debug(align, 'using cache for ~w', [MappingId]),
+	    member(align(S,T,P), Mapping)
+	;   e(MappingId, Mapping),
+	    member(align(S,T,P), Mapping)
+	).
+
+
+
+
 
 
 %%	merge_graphs(+ListOfGraphs, -Merged)
diff --git a/lib/amalgame/candidate.pl b/lib/amalgame/candidate.pl
new file mode 100644
index 0000000..b133528
--- /dev/null
+++ b/lib/amalgame/candidate.pl
@@ -0,0 +1,19 @@
+:- module(candidate,
+	  [ prefix_candidate/4
+	  ]).
+
+:- use_module(library(amalgame/alignment_graph)).
+:- use_module(library(semweb/rdf_label)).
+:- use_module(library(semweb/rdf_db)).
+
+prefix_candidate(Source, Target, align(S, T, []), Options) :-
+	rdf_equal(rdfs:label, DefaultProp),
+ 	option(sourcelabel(MatchProp1), Options, DefaultProp),
+	option(targetlabel(MatchProp2), Options, DefaultProp),
+	option(prefixLength(PrefixLength), Options, 4),
+	graph_member(S, Source),
+ 	rdf_has(S, MatchProp1, Lit),
+	literal_text(Lit, Label),
+	sub_atom(Label, 0, PrefixLength, _, Prefix),
+	rdf_has(T, MatchProp2, literal(prefix(Prefix), _)),
+	graph_member(T, Target).
diff --git a/lib/amalgame/gtaa_cornetto.pl b/lib/amalgame/gtaa_cornetto.pl
index 375ada3..068f4a1 100644
--- a/lib/amalgame/gtaa_cornetto.pl
+++ b/lib/amalgame/gtaa_cornetto.pl
@@ -31,6 +31,15 @@
 :- use_module(library(semweb/rdfs)).
 
 
+
+
+
+
+
+
+
+
+
 align :-
 	debug(align),
 	GTAA_Subjects = scheme('http://data.beeldengeluid.nl/gtaa/Onderwerpen'),
diff --git a/lib/amalgame/matchers/snowball_match.pl b/lib/amalgame/matchers/snowball_match.pl
index b642748..61bd947 100644
--- a/lib/amalgame/matchers/snowball_match.pl
+++ b/lib/amalgame/matchers/snowball_match.pl
@@ -5,14 +5,25 @@
 :- use_module(library(semweb/rdf_litindex)).
 :- use_module(library(snowball)).
 :- use_module(library(lit_distance)).
+:- use_module(library(amalgame/alignment_graph)).
+:- use_module(library(amalgame/candidate)).
 
-:- public match/3.
-:- multifile amalgame:component/2.
 
-amalgame:component(match, snowball(align(uri, uri, provenance), align(uri,uri,provenance),
-					      [sourcelabel(uri, [default(rdfs:label)]),
-					       targetlabel(uri, [default(rdfs:label)])
-					      ])).
+:- public matcher/4.
+
+matcher(Source, Target, Mappings, Options) :-
+	findall(A, align(Source, Target, A, Options), Mappings).
+
+align(Source, Target, Match, Options) :-
+	option(prefix(0), Options),
+	!,
+	graph_member(S, Source),
+	match(align(S,T,[]), Match, Options),
+	graph_member(T, Target).
+
+align(Source, Target, Match, Options) :-
+ 	prefix_candidate(Source, Target, Match0, Options),
+	match(Match0, Match, Options).
 
 match(align(Source, Target, Prov0), align(Source, Target, [Prov|Prov0]), Options) :-
  	rdf_equal(rdfs:label, DefaultProp),
@@ -48,4 +59,5 @@ match(align(Source, Target, Prov0), align(Source, Target, [Prov|Prov0]), Options
  	Prov = [method(snowball),
  		graph([rdf(Source, SourceProp, literal(lang(SourceLang, SourceLabel))),
 		       rdf(Target, TargetProp, literal(lang(TargetLang, TargetLabel)))])
-	       ].
+	       ],
+	debug(align, 'snowball match: ~p ~p', [Source,Target]).
diff --git a/lib/amalgame/select/select1_1.pl b/lib/amalgame/select/select1_1.pl
new file mode 100644
index 0000000..b35961b
--- /dev/null
+++ b/lib/amalgame/select/select1_1.pl
@@ -0,0 +1,27 @@
+:- module(select1_1,[]).
+
+:- use_module(library(semweb/rdf_db)).
+
+:- public selecter/5.
+
+%%	selecter(+Source, -Selected, -Discarded, -Undecided, +Options)
+%
+
+selecter(AlignmentGraph, Sel, Dis, [], _Options) :-
+ 	select_(AlignmentGraph, Sel, Dis).
+
+select_([], [], []).
+select_([align(S,T,P)|As], A1, A2) :-
+	same_source(As, S, Same, Rest),
+	(   Same = []
+	->  A2 = [align(S,T,P)|A2Rest],
+	    A1 = A1Rest
+	;   append([align(S,T,P)|Same], A1Rest, A1),
+	    A2 = A2Rest
+	),
+	select_(Rest, A1Rest, A2Rest).
+
+same_source([align(S,T,P)|As], S, [align(S,T,P)|Same], Rest) :-
+	!,
+	same_source(As, S, Same, Rest).
+same_source(As, _S, [], As).