amalgame/commit
MISC: refactoring, improve caching logic. Still work in progress
author | Jacco van Ossenbruggen |
---|---|
Tue Sep 16 18:48:31 2014 +0200 | |
committer | Jacco van Ossenbruggen |
Tue Sep 16 18:48:31 2014 +0200 | |
commit | 3abaa70a87bd0473dd5aae6470f00c6f4321bead |
tree | c0dfeac6d238cc181a7f3f97d3cbe3e535799ecf |
parent | be349cd20591765282f9add5ce1bc8a55e12e04d |
Diff style: patch stat
diff --git a/applications/builder.pl b/applications/builder.pl index b9d4213..a6e4dc8 100644 --- a/applications/builder.pl +++ b/applications/builder.pl @@ -81,14 +81,14 @@ precalc_voc_stats(Strategy) :- % handy to know how many concepts etc are in each vocab, % both for the user as for the hints system etc. forall(strategy_vocabulary(Strategy, Vocab), - ( node_stats(Strategy, Vocab, Stats, [compute(false)]), - option(totalCount(_), Stats) - -> ( setting(amalgame:precompute, true) + ( node_stats(Strategy, Vocab, Stats, [compute(false)]) + -> option(totalCount(N), Stats), + print_message(informational, + map(found, 'SKOS Concepts for ', Vocab, N)) + ; ( setting(amalgame:precompute, true) -> precompute_node(Strategy, Vocab) ; true ) - ; print_message(informational, - map(found, 'No SKOS Concepts for ', Vocab, 0)) ) ). diff --git a/applications/startpage.pl b/applications/startpage.pl index ef6687b..2930730 100644 --- a/applications/startpage.pl +++ b/applications/startpage.pl @@ -9,7 +9,7 @@ :- use_module(library(http/html_write)). :- use_module(library(yui3_beta)). -:- use_module(library(amalgame/util)). +:- use_module(library(amalgame/vocabulary)). :- use_module(library(amalgame/ag_strategy)). :- use_module(applications(skos_browser)). diff --git a/lib/ag_drivers/exec_amalgame_process.pl b/lib/ag_drivers/exec_amalgame_process.pl index 32c1a7a..d836aff 100644 --- a/lib/ag_drivers/exec_amalgame_process.pl +++ b/lib/ag_drivers/exec_amalgame_process.pl @@ -109,7 +109,7 @@ exec_amalgame_process(Type, Process, Strategy, Module, MapSpec, Time, Options) : collect_snd_input(Process, Strategy, SecInput), rdf(Process, amalgame:source, Source, Strategy), rdf(Process, amalgame:target, Target, Strategy), - vocab_spec(Strategy, Source, SourceSpec), + expand_node(Strategy, Source, SourceSpec), vocab_spec(Strategy, Target, TargetSpec), timed_call(Module:matcher(SourceSpec, TargetSpec, Mapping0, [snd_input(SecInput)|Options]), Time), diff --git a/lib/ag_modules/compound_label_generator.pl b/lib/ag_modules/compound_label_generator.pl index d2b87a3..f6dc963 100644 --- a/lib/ag_modules/compound_label_generator.pl +++ b/lib/ag_modules/compound_label_generator.pl @@ -3,7 +3,6 @@ :- use_module(library(semweb/rdf_db)). :- use_module(library(amalgame/vocabulary)). -:- use_module(library(amalgame/ag_strategy)). :- use_module(string_match_util). :- use_module(compound_label_match). @@ -22,7 +21,7 @@ parameter(targetlabel, oneof(LabelProps), Default, rdf_equal(Default, rdfs:label), label_list(LabelProps). parameter(source_language, oneof(['any'|L]), 'any', 'Language of source label') :- - strategy_languages(_,L). + amalgame_vocabulary_languages(L). parameter(matchacross_lang, boolean, true, 'Allow labels from different language to be matched'). parameter(matchacross_type, boolean, true, diff --git a/lib/ag_modules/compound_label_selecter.pl b/lib/ag_modules/compound_label_selecter.pl index 726f6d8..4ce4430 100644 --- a/lib/ag_modules/compound_label_selecter.pl +++ b/lib/ag_modules/compound_label_selecter.pl @@ -7,6 +7,7 @@ :- use_module(library(amalgame/ag_strategy)). :- use_module(library(semweb/rdf_db)). +:- use_module(library(amalgame/vocabulary)). :- use_module(label_selecter). :- use_module(compound_label_match). :- use_module(string_match_util). @@ -25,7 +26,7 @@ parameter(targetlabel, oneof(LabelProps), Default, label_list(LabelProps). parameter(source_language, oneof(['any'|L]), 'any', 'Language of source label') :- - strategy_languages(_S,L). + amalgame_vocabulary_languages(L). parameter(matchacross_lang, boolean, true, 'Allow labels from different language to be matched'). parameter(matchacross_type, boolean, true, diff --git a/lib/ag_modules/exact_label_generator.pl b/lib/ag_modules/exact_label_generator.pl index 4168cff..4091797 100644 --- a/lib/ag_modules/exact_label_generator.pl +++ b/lib/ag_modules/exact_label_generator.pl @@ -1,8 +1,8 @@ :- module(exact_label_generator, []). +:- use_module(library(lists)). :- use_module(library(semweb/rdf_db)). :- use_module(library(amalgame/vocabulary)). -:- use_module(library(amalgame/ag_strategy)). :- use_module(exact_label_match). :- use_module(string_match_util). @@ -23,7 +23,7 @@ parameter(targetlabel, oneof(LabelProps), Default, label_list(LabelProps). parameter(source_language, oneof(['any'|L]), 'any', 'Language of source label') :- - strategy_languages(_S,L). + amalgame_vocabulary_languages(L). parameter(matchacross_lang, boolean, true, 'Allow labels from different language to be matched'). parameter(matchacross_type, boolean, true, @@ -39,9 +39,9 @@ parameter(match_qualified_only, boolean, false, % and Target. matcher(Source, Target, Mappings, Options) :- - findall(M, align(Source, Target, M, Options), Mappings0), + profile(findall(M, align(Source, Target, M, Options), Mappings0)), sort(Mappings0, Mappings). align(Source, Target, Match, Options) :- - vocab_member(S, Source), + member(S, Source), exact_label_match(align(S,_,[]), Match, [target_scheme(Target)|Options]). diff --git a/lib/ag_modules/exact_label_match.pl b/lib/ag_modules/exact_label_match.pl index a64a79d..401ebb4 100644 --- a/lib/ag_modules/exact_label_match.pl +++ b/lib/ag_modules/exact_label_match.pl @@ -47,6 +47,7 @@ exact_label_match(align(Source, Target, Prov0), SourceLabel \= '', skos_match(Target, MatchPropT, SearchTarget, TargetProp, Options), + Source \= Target, ( option(target_scheme(TargetScheme), Options) -> vocab_member(Target, TargetScheme) diff --git a/lib/ag_modules/exact_label_selecter.pl b/lib/ag_modules/exact_label_selecter.pl index 155e154..aec4c51 100644 --- a/lib/ag_modules/exact_label_selecter.pl +++ b/lib/ag_modules/exact_label_selecter.pl @@ -5,7 +5,7 @@ :- public selecter/5. :- public parameter/4. -:- use_module(library(amalgame/ag_strategy)). +:- use_module(library(amalgame/vocabulary)). :- use_module(library(semweb/rdf_db)). :- use_module(label_selecter). :- use_module(exact_label_match). @@ -25,7 +25,7 @@ parameter(targetlabel, oneof(LabelProps), Default, label_list(LabelProps). parameter(source_language, oneof(['any'|L]), 'any', 'Language of source label') :- - strategy_languages(_S,L). + amalgame_vocabulary_languages(L). parameter(matchacross_lang, boolean, true, 'Allow labels from different language to be matched'). parameter(matchacross_type, boolean, true, diff --git a/lib/ag_modules/isub_generator.pl b/lib/ag_modules/isub_generator.pl index 1c336bc..cc71420 100644 --- a/lib/ag_modules/isub_generator.pl +++ b/lib/ag_modules/isub_generator.pl @@ -3,7 +3,6 @@ :- use_module(library(semweb/rdf_db)). :- use_module(library(amalgame/vocabulary)). -:- use_module(library(amalgame/ag_strategy)). :- use_module(isub_match). :- use_module(string_match_util). @@ -25,7 +24,7 @@ parameter(targetlabel, oneof(LabelProps), Default, parameter(threshold, float, 0.7, 'threshold edit distance'). parameter(language, oneof(['any'|L]), 'any', 'Language of source label') :- - strategy_languages(_,L). + amalgame_vocabulary_languages(L). parameter(matchacross_lang, boolean, true, 'Allow labels from different language to be matched'). diff --git a/lib/ag_modules/isub_selecter.pl b/lib/ag_modules/isub_selecter.pl index b02ee08..d0b6325 100644 --- a/lib/ag_modules/isub_selecter.pl +++ b/lib/ag_modules/isub_selecter.pl @@ -5,7 +5,7 @@ :- public selecter/5. :- public parameter/4. -:- use_module(library(amalgame/ag_strategy)). +:- use_module(library(amalgame/vocabulary)). :- use_module(library(semweb/rdf_db)). :- use_module(label_selecter). :- use_module(string_match_util). @@ -28,7 +28,7 @@ parameter(targetlabel, oneof(LabelProps), Default, parameter(threshold, float, 0.7, 'threshold edit distance'). parameter(language, oneof(['any'|L]), 'any', 'Language of source label') :- - strategy_languages(_,L). + amalgame_vocabulary_languages(L). parameter(matchacross_lang, boolean, true, 'Allow labels from different language to be matched'). diff --git a/lib/ag_modules/snowball_label_generator.pl b/lib/ag_modules/snowball_label_generator.pl index 4516f53..eb2b5bd 100644 --- a/lib/ag_modules/snowball_label_generator.pl +++ b/lib/ag_modules/snowball_label_generator.pl @@ -4,7 +4,6 @@ :- use_module(library(semweb/rdf_db)). :- use_module(library(snowball)). :- use_module(library(amalgame/vocabulary)). -:- use_module(library(amalgame/ag_strategy)). :- use_module(string_match_util). :- use_module(snowball_label_match). @@ -24,7 +23,7 @@ parameter(targetlabel, oneof(LabelProps), Default, rdf_equal(Default, rdfs:label), label_list(LabelProps). parameter(source_language, oneof(['any'|L]), 'any', 'Language of source label') :- - strategy_languages(_,L). + amalgame_vocabulary_languages(L). parameter(matchacross_lang, boolean, true, 'Allow labels from different language to be matched'). parameter(snowball_language, oneof(Languages), english, diff --git a/lib/ag_modules/snowball_label_selecter.pl b/lib/ag_modules/snowball_label_selecter.pl index 3e5a938..31cd261 100644 --- a/lib/ag_modules/snowball_label_selecter.pl +++ b/lib/ag_modules/snowball_label_selecter.pl @@ -1,7 +1,7 @@ :- module(snowball_label_selecter, []). -:- use_module(library(amalgame/ag_strategy)). +:- use_module(library(amalgame/vocabulary)). :- use_module(library(semweb/rdf_db)). :- use_module(library(snowball)). :- use_module(string_match_util). @@ -27,7 +27,7 @@ parameter(targetlabel, oneof(LabelProps), Default, rdf_equal(Default, rdfs:label), label_list(LabelProps). parameter(source_language, oneof(['any'|L]), 'any', 'Language of source label') :- - strategy_languages(_,L). + amalgame_vocabulary_languages(L). parameter(matchacross_lang, boolean, true, 'Allow labels from different language to be matched'). parameter(snowball_language, oneof(Languages), english, diff --git a/lib/amalgame/ag_stats.pl b/lib/amalgame/ag_stats.pl index 5f14f4f..911e8b7 100644 --- a/lib/amalgame/ag_stats.pl +++ b/lib/amalgame/ag_stats.pl @@ -1,8 +1,7 @@ :- module(ag_stats,[ node_stats/4, reference_counts/3, - mapping_stats/4, - scheme_stats/4 + mapping_stats/4 ]). :- use_module(library(option)). @@ -34,11 +33,13 @@ node_stats(Strategy, Node, Stats, Options) :- % % Counts for the items in the set denoted by URI. -node_counts(URL, Strategy, Stats, Options) :- +node_counts(URL, Strategy, Stats, _Options) :- + stats_cache(URL-Strategy, Stats), + !. +node_counts(_URL, _Strategy, _Stats, Options) :- option(compute(false), Options, true), !, - stats_cache(URL-Strategy, Stats), - is_dict(Stats). + fail. node_counts(URL, Strategy, Stats, Options) :- option(compute(true), Options, true), diff --git a/lib/amalgame/caching.pl b/lib/amalgame/caching.pl index 6e8253d..c0ae943 100644 --- a/lib/amalgame/caching.pl +++ b/lib/amalgame/caching.pl @@ -24,7 +24,7 @@ :- dynamic expand_cache/2, - mapped_concepts_cache/1, + mapped_concepts_cache/4, stats_cache/2. :- setting(amalgame:cache_time, float, 0.0, @@ -51,7 +51,7 @@ flush_stats_cache(Strategy) :- flush_stats_cache(_Mapping, Strategy). flush_stats_cache(Mapping, Strategy) :- - retractall(mapped_concepts_cache(m(Strategy, _, Mapping, _))), + retractall(mapped_concepts_cache(Strategy, _, Mapping, _)), retractall(stats_cache(Mapping-Strategy,_)). flush_refs_cache(Strategy) :- @@ -82,10 +82,9 @@ cache_result(ExecTime, Process, Strategy, Result) :- cache_mapped_concepts(Strategy, Type, Mapping, Concepts) :- var(Concepts),!, - mapped_concepts_cache(m(Strategy, Type, Mapping, Concepts)). + mapped_concepts_cache(Strategy, Type, Mapping, Concepts). cache_mapped_concepts(Strategy, Type, Mapping, Sorted) :- - ground(Sorted),!, - assert(mapped_concepts_cache(m(Strategy, Type, Mapping, Sorted))). + assert(mapped_concepts_cache(Strategy, Type, Mapping, Sorted)). clean_repository :- debug(ag_expand, 'Deleting all graphs made by amalgame, including strategies!', []), diff --git a/lib/amalgame/expand_graph.pl b/lib/amalgame/expand_graph.pl index 0ae3a6e..e14142d 100644 --- a/lib/amalgame/expand_graph.pl +++ b/lib/amalgame/expand_graph.pl @@ -3,14 +3,12 @@ vocab_spec/3, precompute_process/2, precompute_node/2, - all_mapped/4, - is_mapped/4 + all_mapped/4 ]). :- use_module(library(apply)). :- use_module(library(debug)). :- use_module(library(lists)). -:- use_module(library(ordsets)). :- use_module(library(semweb/rdf_db)). :- use_module(library(semweb/rdfs)). @@ -71,6 +69,7 @@ precompute_node(Strategy, Mapping) :- % True if Concepts are all sources/targets in the correspondences % of Mapping. Type is either source or target. all_mapped(Strategy, Type, Mapping, Concepts) :- + atom(Mapping), ( cache_mapped_concepts(Strategy, Type, Mapping, Concepts) -> true ; expand_node(Strategy, Mapping, Result), @@ -79,19 +78,19 @@ all_mapped(Strategy, Type, Mapping, Concepts) :- cache_mapped_concepts(Strategy, Type, Mapping, Sorted) ). -%% is_mapped(+Strategy, +Type, +Concept, +Mapping) is semidet. -% -% True if Concept is a source/target in a correspondence in -% Mapping. Type is either source or target. -is_mapped(Strategy, Type, Concept, Mapping) :- - ( cache_mapped_concepts(Strategy, Type, Mapping, Concepts) +all_mapped(Strategy, Type, Mappings, Concepts) :- + is_list(Mappings), + ( cache_mapped_concepts(Strategy, Type, Mappings, Concepts) -> true - ; expand_node(Strategy, Mapping, Result), - maplist(correspondence_element(Type), Result, Concepts), - sort(Concepts, Sorted), - cache_mapped_concepts(Strategy, Type, Mapping, Sorted) - ), - ord_memberchk(Concept, Concepts). + ; maplist(expand_node(Strategy), Mappings, Results), + append(Results, Result), + maplist(my_correspondence_element(Type), Result, Concepts0), + ord_list_to_rbtree(Concepts0, Concepts), + cache_mapped_concepts(Strategy, Type, Mappings, Concepts) + ). + +my_correspondence_element(Type, Align3, E-t) :- + correspondence_element(Type, Align3, E). expand_node_(Strategy, Id, Result) :- % Try if we get this result from the expand_cache first: @@ -189,6 +188,7 @@ expand_vocab(Strategy, Id, Concepts) :- expand_vocab(Strategy, Vocab, List) :- findall(C, skos_in_scheme(Vocab, C), List), + debug(ag_expand, 'Concepts of ~p computed and cached', [Vocab]), cache_result(_, Vocab, Strategy, List). vocab_spec(Strategy, Id, Spec) :- diff --git a/lib/amalgame/hooks/skos_browser.pl b/lib/amalgame/hooks/skos_browser.pl index 2c67502..7943094 100644 --- a/lib/amalgame/hooks/skos_browser.pl +++ b/lib/amalgame/hooks/skos_browser.pl @@ -1,4 +1,4 @@ -:- module(skos_broser_hooks, []). +:- module(skos_browser_hooks, []). :- use_module(library(option)). :- use_module(library(semweb/rdf_db)). @@ -42,11 +42,7 @@ mapped_chk([C|T], Graphs, [C|Rest], Options) :- mapped_chk([_|T], Graphs, Rest, Options) :- mapped_chk(T, Graphs, Rest, Options). -is_mapped(Concept, Mappings, Options) :- - option(strategy(Strategy), Options), - member(Mapping, Mappings), - ( is_mapped(Strategy, source, Concept, Mapping) - -> true - ; is_mapped(Strategy, target, Concept, Mapping) - ). +is_mapped(_Concept, _Mappings, Options) :- + option(strategy(_Strategy), Options), + fail. diff --git a/lib/amalgame/scheme_stats.pl b/lib/amalgame/scheme_stats.pl index 071229e..dec1231 100644 --- a/lib/amalgame/scheme_stats.pl +++ b/lib/amalgame/scheme_stats.pl @@ -33,6 +33,7 @@ scheme_stats(Scheme, Concepts, Strategy, Stats) :- totalLabelCount: TotalLabelCount, uniqueLabelCount: UniqueLabelCount }, + debug(scheme_stats, 'Computing stats for ~p', [Scheme]), ( skos_in_scheme(Scheme, _) -> Virtual = false ; strategy_process_entity(Strategy, _ ,Scheme) diff --git a/lib/amalgame/util.pl b/lib/amalgame/util.pl index 03a096f..95e4085 100644 --- a/lib/amalgame/util.pl +++ b/lib/amalgame/util.pl @@ -1,7 +1,5 @@ :- module(ag_utils, [ mint_node_uri/3, - amalgame_alignable_schemes/1, - assert_user_provenance/2, now_xsd/1, @@ -17,8 +15,6 @@ :- use_module(library(semweb/rdf_db)). :- use_module(user(user_db)). -:- use_module(library(amalgame/rdf_util)). -:- use_module(library(skos/util)). %% mint_node_uri(+Strategy, +Type, -URI) is det. @@ -41,28 +37,6 @@ mint_node_uri(Strategy, Type, URI) :- \+ rdf_graph(URI), !. -%% amalgame_alignable_schemes(-Schemes) is det. -% -% Schemes is unified with a sorted list of urls of -% skos:ConceptSchemes or other alignable objects. -% -% Sorting is based on case insensitive scheme labels. - -amalgame_alignable_schemes(Schemes) :- - findall(S, alignable_scheme(S), All), - maplist(scheme_label, All, Labeled), - keysort(Labeled, Sorted), - pairs_values(Sorted, Schemes). - -alignable_scheme(S) :- - skos_is_vocabulary(S), - skos_in_scheme_chk(S, _). -skos_in_scheme_chk(Scheme, Concept) :- - skos_in_scheme(Scheme, Concept), !. -scheme_label(URI, Key-URI) :- - rdf_graph_label(URI, CasedKey), - downcase_atom(CasedKey, Key). - has_write_permission :- logged_on(User, anonymous), diff --git a/lib/amalgame/vocabulary.pl b/lib/amalgame/vocabulary.pl index 4f3cd40..4c69782 100644 --- a/lib/amalgame/vocabulary.pl +++ b/lib/amalgame/vocabulary.pl @@ -1,6 +1,8 @@ :- module(vocab, [ vocab_member/2, - all_vocab_members/2 + all_vocab_members/2, + amalgame_alignable_schemes/1, + amalgame_vocabulary_languages/1 ]). :- use_module(library(apply)). @@ -12,6 +14,7 @@ :- use_module(library(skos/util)). :- use_module(ag_stats). :- use_module(expand_graph). % for virtual vocab schemes +:- use_module(rdf_util). %% vocab_member(?C, +VocabDef) % @@ -81,9 +84,9 @@ vocab_member(E, is_mapped(Options)) :- option(snd_input(Mappings), Options), option(type(Type), Options), option(strategy(Strategy), Options), - member(Mapping, Mappings), - is_mapped(Strategy, Type, E, Mapping), - !. + all_mapped(Strategy, Type, Mappings, Concepts), + !, + rb_in(E, _, Concepts). vocab_member(F, 'http://sws.geonames.org/') :- !, @@ -154,3 +157,37 @@ all_vocab_members(vscheme(Scheme), Concepts) :- all_vocab_members(VocSpec, Concepts) :- findall(C, vocab_member(C, VocSpec), Concepts0), sort(Concepts0, Concepts). + +%% amalgame_alignable_schemes(-Schemes) is det. +% +% Schemes is unified with a sorted list of urls of +% skos:ConceptSchemes or other alignable objects. +% +% Sorting is based on case insensitive scheme labels. + +amalgame_alignable_schemes(Schemes) :- + findall(S, alignable_scheme(S), All), + maplist(scheme_label, All, Labeled), + keysort(Labeled, Sorted), + pairs_values(Sorted, Schemes). + +alignable_scheme(S) :- + skos_is_vocabulary(S), + skos_in_scheme_chk(S, _). +skos_in_scheme_chk(Scheme, Concept) :- + skos_in_scheme(Scheme, Concept), !. +scheme_label(URI, Key-URI) :- + rdf_graph_label(URI, CasedKey), + downcase_atom(CasedKey, Key). + +amalgame_vocabulary_languages(Languages) :- + amalgame_alignable_schemes(Schemes), + maplist(lang_used(_Strategy), Schemes, Langs), + append(Langs, Languages0), + sort(Languages0, Languages). + +lang_used(Strategy, Voc, Langs) :- + node_stats(Strategy, Voc, Stats, []), + option(languages(Langs), Stats). + +