:- module(exp_stats, [tag_stats/4, find_concepts/0, find_concepts/1, video_tags_to_concepts/3, tag_entries/0, confirm/0 ]). :- use_module(library(semweb/rdf_db)). :- use_module(library(semweb/rdfs)). :- use_module(library(count)). :- use_module(api(reconcile)). :- use_module(library(video_annotation)). :- use_module(library(yaz_util)). :- use_module(library(semweb/rdf_litindex)). :- use_module(library(semweb/rdf_label)). user:file_search_path(exp_data, exp_data). :- dynamic exact_match_cache/1, match_cache/2, tag_concept_link/3. :- rdf_find_literals(zzz, _). /******************************* * tag stats * *******************************/ tag_stats(Total, Unique, Matched, MatchedUnique) :- findall(E, rdf(E, rdf:type, pprime:'TagEntry'), Es), length(Es, Total), findall(T, rdf(T, rdf:type, pprime:'Tag'), Ts), length(Ts, Unique), findall(M-Tag, (rdf(M, rdf:type, pprime:'TagEntry'), rdf(M, rdf:value, Tag), rdf(M, pprime:score, literal(S)), S > 0 ), Ms), length(Ms, Matched), pairs_values(Ms, Tags0), sort(Tags0, Tags), length(Tags, MatchedUnique). /******************************* * tag to concepts * *******************************/ find_concepts :- find_concepts(_). find_concepts(N) :- waisda_tags(Tags), length(Tags, Count), ( var(N) -> Limit = Count ; Limit is min(Count, N) ), open('tag_concept_links.pl', write, Stream, [encoding(utf8)]), find_tag_concepts(Tags, Limit, Stream), close(Stream). find_tag_concepts([], _, _) :- !. find_tag_concepts(_, 0, _) :- !. find_tag_concepts([Tag|Ts], N, Stream) :- N0 is N-1, reconcile(Tag, 100, Hits), maplist(tag_hit_type, Hits, Types0), sort(Types0, Types), Term = tag_concept_link(Tag, Types, Hits), assert(Term), debug(tag_concept, '~w ~w ~w', [N, Tag, Types]), format(Stream, '~q .~n', [Term]), find_tag_concepts(Ts, N0, Stream). tag_hit_type(hit(_,R,_,_), Type) :- c_type(R, Type). c_type(R, person(gtaa_person)) :- rdf(R, skos:inScheme, gtaa:'Persoonsnamen'), !. c_type(R, person(gtaa_naam)) :- rdf(R, skos:inScheme, gtaa:'Namen'), !. c_type(R, person(dbpedia)) :- rdf(R, rdf:type, foaf:'Person'), !. c_type(R, place(gtaa)) :- rdf(R, skos:inScheme, gtaa:'GeografischeNamen'), !. c_type(R, place(geonames)) :- rdf(R, skos:inScheme, gtaa:'Geonames'), !. c_type(R, emotion(cornetto_state)) :- rdf_reachable(R, skos:broader, 'http://purl.org/vocabularies/cornetto/synset-gesteldheid-1-noun'), !. c_type(R, subject(gtaa)) :- rdf(R, skos:inScheme, gtaa:'Onderwerpen'), !. c_type(R, subject(cornetto_noun)) :- rdf(R, rdf:type, cornetto:'NounSynset'), !. c_type(R, subject(cornetto_verb)) :- rdf(R, rdf:type, cornetto:'VerbSynset'), !. c_type(R, other(Class)) :- ( rdf(R, rdf:type, Class) -> true ; Class = unknown ). waisda_tags(Tags) :- findall(Tag, (rdf(T,rdf:type,pprime:'Tag'), rdf(T,rdfs:label,Tag0), tag_value(Tag0, Tag), atom(Tag) ), Tags). tag_concepts :- rdf_equal(skos:'Concept', Type), tag_concepts(Type, [], Pairs), tag_concept_stats(Pairs). tag_cornetto_nouns :- rdf_equal(cornetto:'NounSynset', Type), tag_concepts(Type, [], Pairs), tag_concept_stats(Pairs). tag_gtaa_subjects :- rdf_equal(skos:inScheme, P), rdf_equal(gtaa:'Onderwerpen', Scheme), tag_concepts([], [P-Scheme], Pairs), tag_concept_stats(Pairs). tag_gtaa_namen :- rdf_equal(skos:inScheme, P), rdf_equal(gtaa:'Namen', Scheme), tag_concepts([], [P-Scheme], Pairs), tag_concept_stats(Pairs). tag_gtaa_person :- rdf_equal(skos:inScheme, P), rdf_equal(gtaa:'Persoonsnamen', Scheme), tag_concepts([], [P-Scheme], Pairs), tag_concept_stats(Pairs). tag_dbpedia_person :- tag_concepts('http://xmlns.com/foaf/0.1/Person', [], Pairs), tag_concept_stats(Pairs). tag_gtaa_places :- rdf_equal(skos:inScheme, P), rdf_equal(gtaa:'GeografischeNamen', Scheme), tag_concepts([], [P-Scheme], Pairs), tag_concept_stats(Pairs). tag_concepts(Type, Props, Pairs) :- waisda_tags(Tags), length(Tags, N), tag_concepts_(Tags, N, Type, Props, Pairs). tag_concepts_([], _, _, _, []). tag_concepts_([Tag|Ts], N, Type, Props, [Tag-Concepts|Ps]) :- N0 is N-1, reconcile(Tag, -1, Type, Props, Hits), maplist(hit_concept, Hits, Concepts), length(Concepts, NC), ( NC > 0 -> debug(tag_concept, '~w ~p ~w', [N, Tag, NC]) ; true ), tag_concepts_(Ts, N0, Type, Props, Ps). video_tags_to_concepts(Video, Concepts, Options) :- video_annotations(Video, As0, Options), sort_by_arg(As0, 2, As), rdf_equal(skos:'Concept', SKOSConcept), link_tags_to_concepts(As, SKOSConcept, Concepts). tag_concept_stats(Pairs) :- findall(Count-Tag, (member(Tag-Cs, Pairs), length(Cs,Count) ),Ps), keysort(Ps,Ps1), group_pairs_by_key(Ps1, CountPairs), format_count_pairs(CountPairs). format_count_pairs([]). format_count_pairs([Count-Tags|Cs]) :- length(Tags, N), format('~w, ~w~n', [Count, N]), format_count_pairs(Cs). %% link_tags_to_concepts(+Annotations, -Annotations1) % % Add candidate concepts. link_tags_to_concepts([], _, []). link_tags_to_concepts([A0|As], Type, [A|Rest]) :- A0 = annotation(Tag,Start,End,Entries,Score), A = annotation(Tag,Start,End,Entries,Score,Concepts), tag_value(Tag, Value), ( reconcile(Value, 3, Type, [], Hits) -> maplist(hit_concept, Hits, Concepts) ; Concepts = [] ), link_tags_to_concepts(As, Type, Rest). hit_concept(hit(_,URI,_,Label), concept(URI,Label,Alt,Desc)) :- ( rdf_has(URI, rdfs:label, Lit), literal_text(Lit, Alt), \+ Alt == Label -> true ; Alt = '' ), ( rdf_has(URI,skos:scopeNote,Txt) -> literal_text(Txt,Desc) ; rdf_has(URI, skos:definition,Txt) -> literal_text(Txt,Desc) ; Desc = '' ). /******************************* * Waisda * *******************************/ games(N) :- findall(G,rdf(G, rdf:type, pprime:'Game'),Gs), length(Gs, N). game_stats :- format('n, game, players, tags, unique, scored, unique, extra, unique~n'), multi_user_games(Games), length(Games, Total), game_stats(Games, Total). multi_user_games(Games) :- findall(Game-Us, ( rdf(Game, rdf:type, pprime:'Game'), findall(U, rdf(Game,opmv:wasPerformedBy,U), Us), Us = [_,_|_] ), Games). game_stats([], _). game_stats([Game-Users|Gs], N) :- N0 is N-1, game_matches(Game,As), game_file_name(Game, File0), absolute_file_name(exp_data(File0), File), tell(File), format('game_a(['), write_annotations(As), format(']).'), told, game_annotation_stats(Game, Users, As, N), absolute_file_name(exp_data('stats.csv'), StatsFile), append(StatsFile), game_annotation_stats(Game, Users, As, N), told, game_stats(Gs, N0). game_file_name(G,File) :- rdf_global_id(_:File, G), !. game_file_name(G,G). game_annotation_stats(Game, Users, As, N) :- findall(V, member(annotation(V,_,_,_,_,_), As), Vs0), findall(V, (member(annotation(V,_,_,_,S,_), As),S>0), Ss0), findall(V, (member(annotation(V,_,_,_,S,M),As),S==0,M=[_|_]), Ms0), sort(Vs0, Vs), sort(Ss0, Ss), sort(Ms0, Ms), length(Vs0, NTags), length(Ss0, NScored), length(Ms0, NMatch), length(Vs, NUTags), length(Ss, NUScored), length(Ms, NUMatch), length(Users, NUsers), format('~w, ~p, ~w, ~w, ~w, ~w, ~w, ~w, ~w~n', [N, Game, NUsers, NTags, NUTags, NScored, NUScored, NMatch, NUMatch]). game_matches(Game, As) :- Interval = 10000, rdf(Game, opmv:used, Video), video_annotations(Video, As0, [process(Game)]), sort_by_arg(As0, 2, As1), tag_matches(As1, Game, Interval, As), retractall(exact_match_cache(_)). write_annotations([]). write_annotations([A]) :- !, write_term(A, [quoted]). write_annotations([A|As]) :- write_term(A, [quoted]), format(', '), write_annotations(As). %% tag_entries % % Format the tag number of tag entries per user. tag_entries :- findall(C-V,(rdf(S,rdf:value,V),rdf(S,pprime:creator,C)), Ss), keysort(Ss,Ss1), group_pairs_by_key(Ss1,Gs), forall(member(U-Tags,Gs), (length(Tags,N), format('~w ~w~n',[U,N]))). %% confirm % % Format the number of confirmations per user. confirm :- confirmations(Cs), forall(member(U-C,Cs), (length(C,N), format('~w ~w ',[U,N]), format_action(accept, C), format_action(reject, C), format('~n') )). %% confirm_by_type(+Action) % % Format the number of confirmations for each type of match. confirm_by_type :- confirmations(Cs), forall(member(U-C,Cs), ( length(C,N), format('~w ~w ~n', [U, N]), format_action(accept, C), format_matches([stem,synonym,sibling,related,specific,generic], accept, C), format_action(reject, C), format_matches([stem,synonym,sibling,related,specific,generic], reject, C) )). confirm_by_type(A) :- confirmations(Cs), Matches = [stem,synonym,sibling,related,specific,generic], format_header(Matches), forall(member(_U-C,Cs), format_matches(Matches, A, C)). format_action(A, Cs) :- proof_count(member(confirm(A,_,_,_,_),Cs),_,N), format('~w: ~w ', [A,N]). format_matches([], _, _) :- format('~n'). format_matches([M|Ms], A, Cs) :- proof_count(member(confirm(A,M,_,_,_),Cs),_,N), format('~w, ', [N]), format_matches(Ms, A, Cs). format_header([]) :- format('~n'). format_header([H|Hs]) :- format('~w, ', H), format_header(Hs). confirmations(Cs) :- findall(C-confirm(A,M,Id,SL,TL), ( rdf(Id, pprime:action, literal(A)), rdf(Id, pprime:match, literal(M)), rdf(Id,pprime:creator,C), rdf(Id,pprime:matchSource,S), rdf(Id,pprime:matchTarget,T), rdf(S,rdf:value,literal(SL)), rdf(T,rdf:value,literal(TL))), Cs0), keysort(Cs0,Cs1), group_pairs_by_key(Cs1,Cs). /******************************* * matching * *******************************/ tag_matches([], _, _, []). tag_matches([A0|As], Process, Interval, [A|Rest]) :- rdf_equal(skos:'Concept', Concept), A0 = annotation(Value,Start,End,Entries,Score0), A = annotation(Value,Start,End,Entries,Score,Matches), Time is Start + Interval, Entries = [i(Id,_)], rdf(Id,pprime:creator,User), check_score_within_game(Id, As, Value, User, Time, Score0, Score), tag_value(Value, Tag), snowball(dutch, Tag, Stem0), downcase_atom(Stem0, Stem), reconcile(Tag, 10, Concept, [], Hits), findall(M, backward_match(Id, M), BMatches), forward_matches(As, Id, Tag, Stem, Hits, User, Time, FMatches), append(BMatches, FMatches, Matches), tag_matches(As, Process, Interval, Rest). check_score_within_game(_,_, _, _, _, 0, 0) :- !. check_score_within_game(Id, _, _, _, _, Score, Score) :- exact_match_cache(Id), !. check_score_within_game(_Id, As, Value, User, End, Score, Score) :- match_in_game(As, Value, User, End, Score, Score), !. check_score_within_game(_, _, _, _, _, _, 0). match_in_game([A|As], Value, User, End, Score, Score) :- A = annotation(Value1,Time1,_,[i(Id1,_)],_), Time1 =< End, ( Value1 = Value, \+ rdf(Id1, pprime:creator, User) -> assert(exact_match_cache(Id1)) ; match_in_game(As, Value, User, End, Score, Score) ). backward_match(Id, M) :- match_cache(Id, M), retractall(match_cache(Id,M)). forward_matches([A|As], Id, Tag, Stem, Hits, User, End, Matches) :- A = annotation(Value1,Time1,_,[i(Id1,_)],_), Time1 =< End, !, ( \+ rdf(Id1, pprime:creator, User), tag_value(Value1, Tag1), \+ Tag == Tag1, match(Stem, Hits, Tag1, Type) -> match_reverse(Type, RType), assert(match_cache(Id1, match(RType, 0, Id, literal(Tag)))), Matches = [match(Type, 0, Id1, literal(Tag1))|Ms] ; Matches = Ms ), forward_matches(As, Id, Tag, Stem, Hits, User, End, Ms). forward_matches(_, _, _, _, _, _, _, []). match(Stem, _, Tag1, stem) :- snowball(dutch, Tag1, Stem1), downcase_atom(Stem1, Stem). match(_Stem, Hits, Tag1, Type) :- Hits = [_|_], reconcile(Tag1, 5, Hits1), member(hit(_,C,_,_), Hits), member(hit(_,C1,_,_), Hits1), tag_concept_match(C, C1, Type). tag_concept_match(R, R, synonym) :- !. tag_concept_match(R1, R2, specific) :- rdf_reachable(R1, skos:broader, R2), !. tag_concept_match(R1, R2, generic) :- rdf_reachable(R2, skos:broader, R1), !. tag_concept_match(R1, R2, sibling) :- rdf_reachable(R1, skos:broader, R, 2, _), rdf_reachable(R2, skos:broader, R, 2, _), !. tag_concept_match(R1, R2, related) :- ( rdf(R2, skos:related, R1) ; rdf(R1, skos:related, R2) ), !. tag_value(literal(Tag), Tag). tag_value(uri(_URI,Tag), Tag). match_reverse(specific, generic). match_reverse(generic, specific). match_reverse(M, M).