amalgame/commit
start with provenance process execution
author | Michiel Hildebrand |
---|---|
Fri Feb 11 20:09:45 2011 +0100 | |
committer | Michiel Hildebrand |
Fri Feb 11 20:09:45 2011 +0100 | |
commit | 25326c90b3db8de4b10bd0df258d5c8dee8b3987 |
tree | 90604e7c185592c1ae4052d40c009f80e2dbdc0d |
parent | 2f7f122888ceafcbf1af8f206c8cad942f4fd7fe |
Diff style: patch stat
diff --git a/lib/amalgame/alignment_graph.pl b/lib/amalgame/alignment_graph.pl index 853f64b..c0b67c6 100644 --- a/lib/amalgame/alignment_graph.pl +++ b/lib/amalgame/alignment_graph.pl @@ -1,19 +1,98 @@ :- module(alignment_graph, - [ graph_member/2, + [ e/2, + flush_map_cache/0, + flush_map_cache/1, + graph_member/2, merge_graphs/2, merge_provenance/2, materialize_alignment_graph/2 ]). :- use_module(library(semweb/rdf_db)). +:- use_module(library(semweb/rdfs)). :- use_module(library(semweb/rdf_persistency)). :- use_module(library(amalgame/edoal)). +:- use_module(library(amalgame/map)). + +% components +:- use_module(library(amalgame/matchers/snowball_match)). +:- use_module(library(amalgame/select/select1_1)). + +:- dynamic + map_cache/2. + +flush_map_cache :- + flush_map_cache(_). +flush_map_cache(Id) :- + retractall(map_cache(Id,_)). + + +e(Id, Mapping) :- + rdf_has(Id, opmv:wasGeneratedBy, Process), + rdf(Process, rdf:type, Type), + do_process(Process, Type, Id, Mapping). + +do_process(Process, Type, Id, Mapping) :- + rdfs_subclass_of(Type, amalgame:'Match'), + !, + rdf(Process, amalgame:source, Source), + rdf(Process, amalgame:target, Target), + process_options(Process, Options), + resource_to_term(Type, Module), + debug(align, 'running ~w matcher', [Module]), + call(Module:matcher, Source, Target, Mapping, Options), + assert(map_cache(Id, Mapping)). + +do_process(Process, Type, Id, Mapping) :- + rdfs_subclass_of(Type, amalgame:'Select'), + !, + rdf(Process, amalgame:source, Source), + process_options(Process, Options), + resource_to_term(Type, Module), + findall(A, graph_member(A, Source), Graph0), + sort(Graph0, Graph), + debug(align, 'running ~w select', [Module]), + call(Module:selecter, Graph, Selected, Discarded, Undecided, Options), + rdf_has(Id, opmv:used, _, P0), + resource_to_term(P0, P), + select_mapping(P, Selected, Discarded, Undecided, Mapping). + +select_mapping(selected, Selected, _, _, Selected). +select_mapping(discarded, _, Discarded, _, Discarded). +select_mapping(undecided, _, _, Undecided, Undecided). + + +%% process_options(+Process, -Options) +% +% + +process_options(Process, Options) :- + rdf(Process, amalgame:parameters, ParamString), + !, + param_string_to_options(ParamString, Options). +process_options(_, []). + +param_string_to_options(S,S). + + + +%% resource_to_term(+RDFResource, -PrologTerm) +% +% Convert Amalgame RDF classes to Prolog predicates. + +resource_to_term(Resource, Term) :- + rdf_global_id(_:Local, Resource), + downcase_atom(Local, Term). + %% graph_member(?Element, ?Graph) % % Enumarate elements of Graph. Where Graph is a list, a skos % scheme URI or a named graph URI. +graph_member(_, Var) :- + var(Var), + !. graph_member(E, List) :- is_list(List), !, @@ -27,6 +106,28 @@ graph_member(E, type(Class)) :- graph_member(E, graph(Graph)) :- !, rdf_has(E, rdf:type, _, Graph). +graph_member(E, Scheme) :- + rdfs_individual_of(Scheme, skos:'ConceptScheme'), + !, + rdf(E, skos:inScheme, Scheme). +graph_member(E, Class) :- + rdfs_individual_of(Class, rdfs:'Class'), + !, + rdfs_individual_of(E, Class). +graph_member(align(S,T,P), MappingId) :- + rdfs_individual_of(MappingId, amalgame:'Mapping'), + ( has_map(_,_,MappingId) + -> has_map([S-T], P, MappingId) + ; map_cache(MappingId, Mapping), + debug(align, 'using cache for ~w', [MappingId]), + member(align(S,T,P), Mapping) + ; e(MappingId, Mapping), + member(align(S,T,P), Mapping) + ). + + + + %% merge_graphs(+ListOfGraphs, -Merged) diff --git a/lib/amalgame/candidate.pl b/lib/amalgame/candidate.pl new file mode 100644 index 0000000..b133528 --- /dev/null +++ b/lib/amalgame/candidate.pl @@ -0,0 +1,19 @@ +:- module(candidate, + [ prefix_candidate/4 + ]). + +:- use_module(library(amalgame/alignment_graph)). +:- use_module(library(semweb/rdf_label)). +:- use_module(library(semweb/rdf_db)). + +prefix_candidate(Source, Target, align(S, T, []), Options) :- + rdf_equal(rdfs:label, DefaultProp), + option(sourcelabel(MatchProp1), Options, DefaultProp), + option(targetlabel(MatchProp2), Options, DefaultProp), + option(prefixLength(PrefixLength), Options, 4), + graph_member(S, Source), + rdf_has(S, MatchProp1, Lit), + literal_text(Lit, Label), + sub_atom(Label, 0, PrefixLength, _, Prefix), + rdf_has(T, MatchProp2, literal(prefix(Prefix), _)), + graph_member(T, Target). diff --git a/lib/amalgame/gtaa_cornetto.pl b/lib/amalgame/gtaa_cornetto.pl index 375ada3..068f4a1 100644 --- a/lib/amalgame/gtaa_cornetto.pl +++ b/lib/amalgame/gtaa_cornetto.pl @@ -31,6 +31,15 @@ :- use_module(library(semweb/rdfs)). + + + + + + + + + align :- debug(align), GTAA_Subjects = scheme('http://data.beeldengeluid.nl/gtaa/Onderwerpen'), diff --git a/lib/amalgame/matchers/snowball_match.pl b/lib/amalgame/matchers/snowball_match.pl index b642748..61bd947 100644 --- a/lib/amalgame/matchers/snowball_match.pl +++ b/lib/amalgame/matchers/snowball_match.pl @@ -5,14 +5,25 @@ :- use_module(library(semweb/rdf_litindex)). :- use_module(library(snowball)). :- use_module(library(lit_distance)). +:- use_module(library(amalgame/alignment_graph)). +:- use_module(library(amalgame/candidate)). -:- public match/3. -:- multifile amalgame:component/2. -amalgame:component(match, snowball(align(uri, uri, provenance), align(uri,uri,provenance), - [sourcelabel(uri, [default(rdfs:label)]), - targetlabel(uri, [default(rdfs:label)]) - ])). +:- public matcher/4. + +matcher(Source, Target, Mappings, Options) :- + findall(A, align(Source, Target, A, Options), Mappings). + +align(Source, Target, Match, Options) :- + option(prefix(0), Options), + !, + graph_member(S, Source), + match(align(S,T,[]), Match, Options), + graph_member(T, Target). + +align(Source, Target, Match, Options) :- + prefix_candidate(Source, Target, Match0, Options), + match(Match0, Match, Options). match(align(Source, Target, Prov0), align(Source, Target, [Prov|Prov0]), Options) :- rdf_equal(rdfs:label, DefaultProp), @@ -48,4 +59,5 @@ match(align(Source, Target, Prov0), align(Source, Target, [Prov|Prov0]), Options Prov = [method(snowball), graph([rdf(Source, SourceProp, literal(lang(SourceLang, SourceLabel))), rdf(Target, TargetProp, literal(lang(TargetLang, TargetLabel)))]) - ]. + ], + debug(align, 'snowball match: ~p ~p', [Source,Target]). diff --git a/lib/amalgame/select/select1_1.pl b/lib/amalgame/select/select1_1.pl new file mode 100644 index 0000000..b35961b --- /dev/null +++ b/lib/amalgame/select/select1_1.pl @@ -0,0 +1,27 @@ +:- module(select1_1,[]). + +:- use_module(library(semweb/rdf_db)). + +:- public selecter/5. + +%% selecter(+Source, -Selected, -Discarded, -Undecided, +Options) +% + +selecter(AlignmentGraph, Sel, Dis, [], _Options) :- + select_(AlignmentGraph, Sel, Dis). + +select_([], [], []). +select_([align(S,T,P)|As], A1, A2) :- + same_source(As, S, Same, Rest), + ( Same = [] + -> A2 = [align(S,T,P)|A2Rest], + A1 = A1Rest + ; append([align(S,T,P)|Same], A1Rest, A1), + A2 = A2Rest + ), + select_(Rest, A1Rest, A2Rest). + +same_source([align(S,T,P)|As], S, [align(S,T,P)|Same], Rest) :- + !, + same_source(As, S, Same, Rest). +same_source(As, _S, [], As).