:- module(ag_stats,[ node_stats/4, reference_counts/3, mapping_stats/4 ]). :- use_module(library(apply)). :- use_module(library(assoc)). :- use_module(library(debug)). :- use_module(library(option)). :- use_module(library(lists)). :- use_module(library(semweb/rdf11)). :- use_module(library(semweb/rdfs)). :- use_module(library(stat_lists)). :- use_module(library(amalgame/expand_graph)). :- use_module(library(amalgame/scheme_stats)). :- use_module(library(amalgame/caching)). :- use_module(library(amalgame/vocabulary)). :- use_module(library(amalgame/ag_reference)). :- use_module(library(amalgame/mapping_graph)). :- use_module(library(amalgame/correspondence)). :- use_module(library(amalgame/util)). :- use_module(library(amalgame/json_util)). node_stats(Strategy, Node, Stats, Options) :- nonvar(Node), nonvar(Options), !, ( rdfs_individual_of(Node, amalgame:'Mapping') -> node_counts(mapping, Node, Strategy, Stats, Options) ; amalgame_alignable_scheme(Node) -> node_counts(scheme, Node, Strategy, Stats, Options) ; Stats = [] ). %% node_counts(+URI,+Strategy, -Stats, +Options) % is det. % % Counts for the items in the set denoted by URI. node_counts(_, URL, Strategy, Stats, Options) :- option(compute(false), Options, true), !, get_stats_cache(Strategy, URL, Stats). node_counts(scheme, Scheme, Strategy, Stats, Options) :- select_option(compute(deep), Options, Options1, true), node_counts(scheme, Scheme, Strategy, _Stats, [compute(labelprop)|Options1]), node_counts(scheme, Scheme, Strategy, Stats, [compute(depth)|Options1]). node_counts(scheme, Scheme, Strategy, Stats, Options) :- select_option(compute(Level), Options, Options1, true), ( Level == depth; Level == labelprop), !, expand_node(Strategy, Scheme, _), atomic_list_concat([Level, '_stats_cache_',Scheme], Mutex), debug(mutex, 'waiting for deep stats mutex ~w', [Mutex]), with_mutex(Mutex, node_counts(scheme, Scheme, Strategy, Stats, Options1)). node_counts(_, URL, Strategy, Stats, _Options) :- get_stats_cache(Strategy, URL, Stats), !. node_counts(_, URL, Strategy, Stats, Options) :- option(compute(true), Options, true), !, atomic_concat(node_counts, URL, Mutex), debug(mutex, 'Locking mutex: ~w', [Mutex]), with_mutex(Mutex, node_counts_(URL, Strategy, Stats)), debug(mutex, 'Releasing mutex: ~w', [Mutex]). node_counts_(URL, Strategy, Stats) :- expand_node(Strategy, URL, _Result), % this should fill the cache get_stats_cache(Strategy, URL, Stats), is_dict(Stats). reference_counts(Id, Strategy, Stats) :- atom_concat(reference_counts, Id, Mutex), with_mutex(Mutex, reference_counts_(Id, Strategy, Stats)). reference_counts_(Id, Strategy, RefStats) :- ( get_stats_cache(Strategy, Id, Stats), option(refs(RefStats), Stats) -> true ; compute_reference_counts(Id, Strategy, RefStats) ). %% mapping_stats(+URL, +Mapping, +Strategy, -Stats) is det. % % Stats are statistics for mapping. mapping_stats(URL, Mapping, Strategy, Stats) :- BasicStats = [ totalCount-MN ], length(Mapping, MN), maplist(correspondence_source, Mapping, Ss0), maplist(correspondence_target, Mapping, Ts0), sort(Ss0, Ss), sort(Ts0, Ts), compute_label_stats(Ss, SLabelDict), compute_label_stats(Ts, TLabelDict), vocab_stats(URL, Strategy, Ss, Ts, VocStats, StructStats, CarthesianProductSize), input_stats(URL, Strategy, Ss, Ts, MN, CarthesianProductSize, InputStats), append([BasicStats, [ labels - label{source:SLabelDict, target:TLabelDict} ], VocStats, StructStats, InputStats], StatsPairs), dict_pairs(Stats,mapping_stats_dict, StatsPairs). input_stats(URL, Strategy, Ss, Ts, MN, CarthesianProductSize, InputStats) :- InputStats = [ sourcePercentageInput-SiPerc, targetPercentageInput-TiPerc, inputPercentage-IP ], length(Ss, SN), length(Ts, TN), findall(Input, has_mapping_input(URL, Strategy, Input), Inputs), ( Inputs \= [] -> maplist(expand_node(Strategy), Inputs, InputMappings), append(InputMappings, Merged), sort(Merged, Unique), maplist(correspondence_source, Unique, Si0), maplist(correspondence_target, Unique, Ti0), sort(Si0, Si), sort(Ti0, Ti), length(Unique, IML), length(Si, SiN), length(Ti, TiN), save_perc(MN, IML, IP), save_perc(SN, SiN, SiPerc), save_perc(TN, TiN, TiPerc) ; save_perc(MN,CarthesianProductSize, IP), SiPerc = 0, TiPerc = 0 ). vocab_stats(URL, Strategy, Ss, Ts, VocStats, StructStats, CarthesianProductSize) :- mapping_vocab_sources(URL, Strategy, InputS, InputT), node_stats(Strategy, InputS, StatsSin, [compute(deep)]), node_stats(Strategy, InputT, StatsTin, [compute(deep)]), option(totalCount(SourceN), StatsSin, 0), option(totalCount(TargetN), StatsTin, 0), length(Ss, SN), length(Ts, TN), save_perc(SN, SourceN, SPerc), save_perc(TN, TargetN, TPerc), js_focus_node(Strategy, InputS, SvocDict), js_focus_node(Strategy, InputT, TvocDict), CarthesianProductSize is SourceN * TargetN, VocStats = [ vocs-vocs{ source:SvocDict, target:TvocDict }, mappedSourceConcepts-SN, mappedTargetConcepts-TN, sourcePercentage-SPerc, targetPercentage-TPerc ], ( StatsSin \= [], StatsTin \= [] -> Smap = StatsSin.get('@private').get(depthMap), nonvar(StatsTin), Tmap = StatsTin.get('@private').get(depthMap), structure_stats(depth, Ss, Smap, DSstats), structure_stats(children, Ss, Smap, BSstats), structure_stats(depth, Ts, Tmap, DTstats), structure_stats(children, Ts, Tmap, BTstats), StructStats = [ source_depth-DSstats, target_depth-DTstats, source_child_stats-BSstats, target_child_stats-BTstats ] ; StructStats = [] ), !. structure_stats(_,[],_,[]). structure_stats(_,[_],_,[]). structure_stats(Type, Concepts, Map, Stats) :- maplist(concept_depth(Type,Map), Concepts, Depths), msort(Depths, DepthsSorted), list_five_number_summary(DepthsSorted, OptionFormat), dict_create(Stats, stats, OptionFormat). concept_depth(depth, Map, Concept, Depth) :- get_assoc(Concept, Map, Depth-_Children). concept_depth(children, Map, Concept, Children) :- get_assoc(Concept, Map, _Depth-Children). concept_depth(Type, _Map, Concept, -1) :- debug(stats, 'ERROR: cannot find ~w for ~p in assoc', [Type, Concept]). has_mapping_input(URL, Strategy, Input) :- rdf_has(URL, amalgame:wasGeneratedBy, Process, RP), rdf(URL, RP, Process, Strategy), rdf_has(Process, amalgame:input, Input), rdfs_individual_of(Input, amalgame:'Mapping'). compute_reference_counts(Id, Strategy, RefStats) :- reference_mappings(Strategy, References), expand_node(Strategy, Id, Mappings), rdf(Id, amalgame:default_relation, Relation), compare_against_ref(Mappings, References, Relation, partition([],[],[],[]), RefStats), get_stats_cache(Strategy, Id, OldStats), put_dict([refs:RefStats], OldStats, NewStats), set_stats_cache(Strategy, Id, NewStats). part_ref_stats(partition(Matches,Conflicts,Unknown, Missing), Stats) :- Stats = refs_stats_dict{matching:MLengthS, conflicting:CLengthS, notInRef:ULengthS, missing:MisLengthS }, length(Matches, MLength), length(Conflicts, CLength), length(Unknown, ULength), length(Missing, MisLength), TotalFound is MLength + CLength + ULength, TotalEval is MLength + CLength + MisLength, save_perc(MLength, TotalFound, MLengthPerc), save_perc(CLength, TotalFound, CLengthPerc), save_perc(ULength, TotalFound, ULengthPerc), save_perc(MisLength, TotalEval, MisLengthPerc), format(atom(MLengthS), '~d (~2f%)', [MLength, MLengthPerc]), format(atom(CLengthS), '~d (~2f%)', [CLength, CLengthPerc]), format(atom(ULengthS), '~d (~2f%)', [ULength, ULengthPerc]), format(atom(MisLengthS), '~d (~2f%)', [MisLength, MisLengthPerc]). compare_against_ref([], Missing, _, partition(Ma, Co, Un, Mi), Stats) :- append(Mi, Missing, Mi2), part_ref_stats(partition(Ma, Co, Un, Mi2), Stats). compare_against_ref(Unknown, [], _, partition(Ma, Co, Un, Mi), Stats) :- append(Un, Unknown, Un2), part_ref_stats(partition(Ma, Co, Un2, Mi), Stats). compare_against_ref([align(S,T,P)|MT],[align(SR,TR,PR)|RT], Rel, partition(Matches,Conflicts,Unknown,Missing), Stats):- compare(SOrder, S, SR), compare(TOrder, T, TR), ( SOrder == < -> compare_against_ref(MT, [align(SR,TR,PR)|RT], Rel, partition(Matches,Conflicts,[align(S,T,P)|Unknown],Missing), Stats) ; SOrder == > -> compare_against_ref([align(S,T,P)|MT], RT, Rel, partition(Matches,Conflicts, Unknown, [align(SR,TR,PR)|Missing]), Stats) ; TOrder == < -> compare_against_ref(MT, [align(SR,TR,PR)|RT], Rel, partition(Matches,Conflicts,[align(S,T,P)|Unknown],Missing), Stats) ; TOrder == > -> compare_against_ref([align(S,T,P)|MT], RT, Rel, partition(Matches,Conflicts, Unknown, [align(SR,TR,PR)|Missing]), Stats) ; member(Manual, PR), ( member(method("manual_evaluation"), Manual) ; member(method("preloaded"), Manual) ), option(relation(Rel), Manual) -> compare_against_ref(MT, RT, Rel, partition([align(S,T,P)|Matches], Conflicts, Unknown, Missing), Stats) ; compare_against_ref(MT, RT, Rel, partition(Matches, [align(S,T,P)|Conflicts], Unknown, Missing), Stats) ).