:- module(tag_concept, [tag_concept/3, video_tags/2, video_concepts/3, derived_concepts/4, m_video/1, baseline_video/1, video_mbh_terms/2, topN/4 ]). :- use_module(library(semweb/rdf_db)). :- use_module(library(semweb/rdf_label)). :- use_module(library(semweb/rdfs)). :- use_module(library(yaz_util)). :- use_module(library(tfidf)). :- use_module(library(find_resource)). :- use_module(library(video_annotation)). :- use_module(library(stop_words)). :- rdf_meta derived_concepts(+, :, r, -), video_concepts(r, r, -). /******************************* * experiment * *******************************/ baseline_video(V) :- rdf(V, rdf:type, pprime:'Video'), once(rdf(V, dc:subject, _, mbh_tag)). derived_concepts(Tags, Goal, Scheme, Concepts) :- findall(C, (member(_Score-Tag, Tags), call(Goal, Tag, C), once(rdf(C, skos:inScheme, Scheme)) ), Concepts0), sort(Concepts0, Concepts). derived_ranked_concepts(Tags, Goal, Scheme, Ranked) :- findall(C-Tag, (member(_-Tag, Tags), call(Goal, Tag, C), once(rdf(C, skos:inScheme, Scheme)) ), Pairs0), keysort(Pairs0, Pairs), group_pairs_by_key(Pairs, Groups), pairs_sort_by_value_count(Groups, Ranked). tag_concept(exact, Tag, Concept) :- rdf_has(Concept, rdfs:label, literal(exact(Tag),_)), once(rdfs_individual_of(Concept, skos:'Concept')). tag_concept(Stem, Tag, Concept) :- ( Stem = stem(Diff) -> true ; Stem = stem -> Diff = 0.9 ), snowball(dutch, Tag, TagStem), %sub_atom(TagStem, 0, _, 1, Prefix), rdf_has(Concept,rdfs:label, literal(prefix(TagStem), Lit)), once(rdfs_individual_of(Concept, skos:'Concept')), literal_text(Lit, Label), ( Label = Tag -> true ; snowball(dutch, Label, LabelStem), find_resource:literal_distance(TagStem, LabelStem, D), %isub(TagStem, Label, true, Sim), D < 4 %Sim > Diff. ). tag_concept(sub, Tag, Concept) :- length(Results, 3), snowball(dutch, Tag, TagStem), find_resource_by_name(TagStem, Hits, [match(prefix),distance(true)]), length(Hits, N), ( N =< 3 -> Results = Hits ; append(Results, _, Hits) ), member(hit(_D,Concept,_,_), Hits), once(rdfs_individual_of(Concept, skos:'Concept')). tag_related_concept(StringMatch, Tag, Concept) :- tag_concept(StringMatch, Tag, C1), ( Concept = C1 ; related(C1, Concept) ). tag_tree_concept(StringMatch, Tag, Concept) :- tag_concept(StringMatch, Tag, C1), ( Concept = C1 ; tree(C1, Concept) ). tag_tree_and_related_concept(StringMatch, Tag, Concept) :- tag_concept(StringMatch, Tag, C1), ( C1 = Concept ; related(C1, Concept) ; tree(C1, Concept) ). tag_tree_related_concept(StringMatch, Tag, Concept) :- tag_concept(StringMatch, Tag, C1), ( C1 = Concept ; related_tree(C1, Concept) ; tree_related(C1, Concept) ). tag_tree_related_sibbling_concept(StringMatch, Tag, Concept) :- tag_concept(StringMatch, Tag, C1), ( C1 = Concept ; related_tree(C1, Concept) ; tree_related(C1, Concept) ; sibbling(C1, Concept) ). related_tree(C, Concept) :- related(C, C1), ( C1 = Concept ; tree(C1, Concept) ). tree_related(C, Concept) :- tree(C, C1), ( C1 = Concept ; related(C1, Concept) ). tree(C1, C2) :- ( rdf_reachable(C1, skos:broader, C2) ; rdf_reachable(C2, skos:broader, C1) ). sibbling(C1, C2) :- rdf(C1, skos:broader, P), rdf(C2, skos:broader, P). related(C1, C2) :- rdf(C1, skos:related, C2). baseline_eval([], _, _, []). baseline_eval([C|Cs], Video, Scheme, [C|Rest]) :- rdf(Video, dc:subject, C), rdf(C, skos:inScheme, Scheme), !, baseline_eval(Cs, Video, Scheme, Rest). baseline_eval([_|Cs], Video, Scheme, Rest) :- baseline_eval(Cs, Video, Scheme, Rest). stats_table(Type) :- scheme_alias(Type, Scheme), format('video\tt\tt_st\tc_ex\tc_st\tc_st_r\tbase~n',[]), ( baseline_video(V), rdf(V, dc:id, literal(Id)), video_concepts(V, Scheme, Concepts), findall(1-T, video_tag(V, literal(T)), Tags), remove_stop_words(Tags, dutch, Tags1), derived_concepts(Tags1, tag_concept(exact), Scheme, I_ExactMatch), derived_concepts(Tags1, tag_concept(stem), Scheme, I_StemMatch), derived_concepts(Tags1, tag_tree_and_related_concept(stem), Scheme, I_StemTR), length(Concepts, ConceptCount), length(Tags, TagCount), length(Tags1, TagFilteredCount), length(I_ExactMatch, ExactCount), length(I_StemMatch, StemCount), length(I_StemTR, StemTRCount), format('~w,\t~w,\t~w,\t~w,\t~w,\t~w,\t~w\t', [Id, TagCount, TagFilteredCount, ExactCount, StemCount, StemTRCount, ConceptCount]), concept_eval(V, Tags1, Scheme, tag_concept(exact)), concept_eval(V, Tags1, Scheme, tag_concept(stem)), concept_eval(V, Tags1, Scheme, tag_tree_and_related_concept(stem)), format('~n'), fail ; true ). eval_table(Type) :- scheme_alias(Type, Scheme), ( baseline_video(V), rdf(V, dc:id, literal(Id)), findall(1-T, video_tag(V, literal(T)), Tags), remove_stop_words(Tags, dutch, Tags1), video_concepts(V, Scheme, Concepts), length(Tags1, TagCount), length(Concepts, ConceptCount), format('~w, ~w, ~w, ', [Id, TagCount, ConceptCount]), concept_eval(V, Tags1, Scheme, tag_concept(exact)), concept_eval(V, Tags1, Scheme, tag_related_concept(exact)), concept_eval(V, Tags1, Scheme, tag_tree_concept(exact)), concept_eval(V, Tags1, Scheme, tag_tree_related_concept(exact)), concept_eval(V, Tags1, Scheme, tag_tree_and_related_concept(exact)), concept_eval(V, Tags1, Scheme, tag_tree_related_sibbling_concept(exact)), format(' '), concept_eval(V, Tags1, Scheme, tag_concept(stem)), concept_eval(V, Tags1, Scheme, tag_related_concept(stem)), concept_eval(V, Tags1, Scheme, tag_tree_concept(stem)), concept_eval(V, Tags1, Scheme, tag_tree_and_related_concept(stem)), concept_eval(V, Tags1, Scheme, tag_tree_related_concept(stem)), concept_eval(V, Tags1, Scheme, tag_tree_related_sibbling_concept(stem)), %format(' '), %concept_eval(V, tag_concept(sub)), %concept_eval(V, tag_related_concept(sub)), %concept_eval(V, tag_tree_concept(sub)), %concept_eval(V, tag_tree_related_concept(sub)), format('~n'), fail ; true ). topN_eval_table(Type, N) :- scheme_alias(Type, Scheme), ( baseline_video(V), rdf(V, dc:id, literal(Id)), video_concepts(V, Scheme, Concepts), video_tags(V, Tags), remove_stop_words(Tags, dutch, Tags1), length(Concepts, ConceptCount), length(Tags, TagCount), format('~w,\t~w,\t~w, ', [Id, TagCount, ConceptCount]), topN_concept_eval(V, Tags1, Scheme, N, tag_concept(exact)), topN_concept_eval(V, Tags1, Scheme, N, tag_related_concept(exact)), topN_concept_eval(V, Tags1, Scheme, N, tag_tree_concept(exact)), topN_concept_eval(V, Tags1, Scheme, N, tag_tree_related_concept(exact)), topN_concept_eval(V, Tags1, Scheme, N, tag_tree_and_related_concept(exact)), topN_concept_eval(V, Tags1, Scheme, N, tag_tree_related_sibbling_concept(exact)), format(' '), topN_concept_eval(V, Tags1, Scheme, N, tag_concept(stem)), topN_concept_eval(V, Tags1, Scheme, N, tag_related_concept(stem)), topN_concept_eval(V, Tags1, Scheme, N, tag_tree_concept(stem)), topN_concept_eval(V, Tags1, Scheme, N, tag_tree_and_related_concept(stem)), topN_concept_eval(V, Tags1, Scheme, N, tag_tree_related_concept(stem)), topN_concept_eval(V, Tags1, Scheme, N, tag_tree_related_sibbling_concept(stem)), format('~n'), fail ; true ). video_tags(Video, Tags) :- findall(1-T, video_tag(Video, literal(T)), Tags). video_concepts(Video, Scheme, Concepts) :- findall(C, (rdf(Video, dc:subject, C), rdf(C, skos:inScheme, Scheme) ), Concepts0), sort(Concepts0, Concepts). scheme_alias(all, Scheme) :- rdf_equal(Scheme, gtaa:'GTAA'). scheme_alias(persons, Scheme) :- rdf_equal(Scheme, gtaa:'Peroonsnamen'). scheme_alias(places, Scheme) :- rdf_equal(Scheme, gtaa:'GeografischeNamen'). scheme_alias(subjects, Scheme) :- rdf_equal(Scheme, gtaa:'OnderwerpenBenG'). scheme_alias(names, Scheme) :- rdf_equal(Scheme, gtaa:'Namen'). concept_eval(V, Tags, Scheme, Goal) :- derived_concepts(Tags, Goal, Scheme, Derived), baseline_eval(Derived,V,Scheme,Eval), length(Derived,DerivedCount), length(Eval,EvalCount), %format('~w, ', [EvalCount]). format('~w,~w,', [EvalCount,DerivedCount]). topN_concept_eval(V, Tags, Scheme, N, Goal) :- derived_ranked_concepts(Tags, Goal, Scheme, Ranked), length(Ranked, Count), ( Count < N -> Top = Ranked ; length(Top, N), append(Top, _, Ranked) ), pairs_values(Ranked, Ranked1), pairs_values(Top, Top1), baseline_eval(Ranked1, V, Scheme, Eval), baseline_eval(Top1, V, Scheme, InTopN), length(Eval, EvalN), length(InTopN, TopN), format('~w, ~w, ', [EvalN, TopN]). %extend_gtaa_hierarchy :- gtaa_wn_hierarchy(GTAA, GTAA_Parent) :- rdf(GTAA, skos:exactMatch, WN), rdf_reachable(WN, skos:broader, WN_Parent), WN_Parent \== WN, rdf(GTAA_Parent, skos:exactMatch, WN_Parent). %rdf_assert(GTAA, skos:broader, GTAA_Parent, gtaa_wordnet_broader). list_concepts(Id, Type, Goal) :- scheme_alias(Type, Scheme), rdf(V, dc:id, literal(Id)), video_concepts(V, Scheme, Concepts), format('prof. annotations~n'), ( member(C, Concepts), display_label(C, L), format('~w~n', [L]), fail ; true ), format('~n derived concepts~n'), video_tags(V, Tags), remove_stop_words(Tags, dutch, Tags1), derived_concepts(Tags1, Goal, Scheme, Derived), ( member(C, Derived), display_label(C, L), format('~w ~n', [L]), fail ; true ), format('~n User tags~n'), ( member(_-T, Tags1), format('~w ~n', [T]), fail ; true ). /******************************* * C * *******************************/ :- use_module(library(real)). concept_rank(Video, Ranked) :- Goal = tag_concept(stem), %rdf_equal(Scheme, 'http://purl.org/vocabularies/cornetto'), rdf_equal(Scheme, gtaa:'OnderwerpenBenG'), video_concept_graph(Video, Scheme, Goal, Graph, Weights), graph_rename(Graph, Assoc, Vector), %show_graph(Vector), page_rank(Vector, Weights, Scores0), % why do I have length(Vector)+1 scores??? Scores0 = [_|Scores], length(Scores, ScoreCount), debug(c_graph, 'scores ~w', [ScoreCount]), assoc_to_list(Assoc, Pairs0), transpose_pairs(Pairs0, Pairs1), keysort(Pairs1, Pairs), pairs_values(Pairs, Concepts), length(Concepts, ConceptCount), debug(c_graph, 'concepts ~w', [ConceptCount]), pairs_keys_values(Ranked0, Scores, Concepts), keysort(Ranked0, Ranked1), reverse(Ranked1, Ranked). concept_clusters(Video, Clusters) :- Goal = tag_concept(exact), rdf_equal(Scheme, 'http://purl.org/vocabularies/cornetto'), %Goal = tag_concept(stem), %rdf_equal(Scheme, gtaa:'OnderwerpenBenG'), video_concept_graph(Video, Scheme, Goal, Graph, _Weights), graph_rename(Graph, Assoc, Vector), graph_cluster(Vector, Membership0), Membership0 = [_|Membership], length(Membership, ClusterCount), debug(c_graph, 'clusters ~w', ClusterCount), assoc_to_list(Assoc, Pairs0), transpose_pairs(Pairs0, Pairs1), keysort(Pairs1, Pairs), pairs_values(Pairs, Concepts), length(Concepts, ConceptCount), debug(c_graph, 'concepts ~w', [ConceptCount]), pairs_keys_values(Clusters0, Membership, Concepts), keysort(Clusters0, Clusters). video_concept_graph(Video, Scheme, Goal, Graph, Weights) :- tag_rank(Video, RankedTagList), length(RankedTagList, Tag_Count), debug(c_graph, '~w tags', [Tag_Count]), interpretation_graph(RankedTagList, Goal, Scheme, IGraph, Weights0), concept_graph(IGraph, CGraph, Weights1), append(IGraph, CGraph, Graph), append(Weights0, Weights1, Weights), length(Graph, Edge_Count), debug(c_graph, '~w edges', [Edge_Count]). %concept_graph(_, [], []). concept_graph(IGraph, CGraph, Weights) :- findall(C, member(i(_T,C), IGraph), Cs0), sort(Cs0, Cs), length(Cs, Concept_Count), debug(c_graph, '~w interpretations', [Concept_Count]), findall(i(C,C1)-Score, (member(C, Cs), expand_graph(C,C1,Score), C \== C1 ), Pairs), pairs_keys_values(Pairs, CGraph, Weights). interpretation_graph(Tags, Goal, Scheme, IGraph, Weights) :- findall(i(Tag,C)-Score, (member(Score-Tag, Tags), call(Goal, Tag, C), once(rdfs_individual_of(C, skos:'Concept')) %once(rdf(C, skos:inScheme, Scheme)) ), Pairs0), sort(Pairs0, Pairs), pairs_keys_values(Pairs, IGraph, Weights). graph_cluster(Vector, Clusters) :- %r_open, <- library(igraph), v <- Vector, g <- graph(v), Clusters <- 'clusters(g)$membership'. %r_close. page_rank(Vector, Weights, Rank) :- %r_open, <- library(igraph), v <- Vector, w <- Weights, g <- graph(v), %r_print( g ), Rank <- 'page.rank(g, directed = FALSE, weights = w)$vector'. %r_in( 'page.rank'(g, vids = 'V'(g), directed = 'TRUE', damping = 0.2, % weights = 'NULL', options = 'igraph.arpack.default') ), %r_close. show_graph(Vector) :- %r_open, <- library(igraph), v <- Vector, g <- graph(v), <- print(g), <- tkplot( g ), write( 'Press Return to continue...' ), nl, read_line_to_codes( user_input, _ ), <- print( 'dev.off()' ). %r_close. expand_graph(C, C1, 0.1) :- rdf(C, skos:related, C1). expand_graph(C, C1, 0.1) :- rdf_reachable(C, skos:broader, C1, 2, _). %expand_graph(C1, C, 0.1) :- % rdf_reachable(C1, skos:broader, C). %expand_graph(C, D, 0.1) :- % rdf(C, cornetto:domain, D). %% graph_rename(+Graph, -Assoc, -NewGraph) % % Rename nodes so that they start counted by 1. graph_rename(Graph, Assoc, NewGraph) :- empty_assoc(Assoc0), rename_vertices(Graph, Assoc0, 1, NewGraph, Assoc). rename_vertices([], Assoc, _, [], Assoc). rename_vertices([i(C1,C2)|T], Assoc0, N, [NewC1,NewC2|Rest], Assoc) :- rename_vertex(C1, Assoc0, N, Assoc1, N1, NewC1), rename_vertex(C2, Assoc1, N1, Assoc2, N2, NewC2), rename_vertices(T, Assoc2, N2, Rest, Assoc). rename_vertex(C, Assoc, N, Assoc, N, New) :- get_assoc(C, Assoc, New), !. rename_vertex(C, Assoc, N, Assoc1, N1, N) :- N1 is N+1, put_assoc(C, Assoc, N, Assoc1). v_count(Graph, Count) :- findall(V, (member(i(V,_), Graph) ;member(i(_,V), Graph) ), Vs0), sort(Vs0, Vs), length(Vs, Count). co_graph(V, Pairs) :- findall(T, video_tag(V, literal(T)), Tags0), sort(Tags0, Tags1), remove_stop_words(Tags1, dutch, Tags), co_tag_matrix(Tags, Tags, M), append(M, Pairs0), keysort(Pairs0, Pairs1), reverse(Pairs1, Pairs). %pairs_keys_values(Pairs, Graph, Weights), co_tag_matrix([], _, []). co_tag_matrix([Tag|T], Tags, [Row|Rs]) :- tag_matrix(Tags, Tag, Row), co_tag_matrix(T, Tags, Rs). tag_matrix([], _, []). tag_matrix([Tag1|Ts], Tag, [S-i(Tag, Tag1)|Rs]) :- cooccur(Tag, Tag1, S), tag_matrix(Ts, Tag, Rs). tag_matrix([_|Ts], Tag, Rs) :- tag_matrix(Ts, Tag, Rs). cooccur(Tag1,Tag2,Score) :- findall(V1, ( video_tag(V1, literal(Tag1)) ), Ds0), sort(Ds0, Ds), length(Ds, D), D > 1, findall(V, ( member(V, Ds), video_tag(V, literal(Tag2)) ), Us0), sort(Us0, Us), length(Us, U), U > 1, Score is U / D. m_video(V) :- baseline_video(V), %rdf(V, rdf:type, pprime:'Video'), multiple_players(V), min_tags(V). min_tags(V) :- findall(P,(rdf(V,pprime:hasAnnotation,A),rdf(A,rdf:value,P)), Ps0), sort(Ps0, Ps), length(Ps,N), N > 0. multiple_players(V) :- findall(P,(rdf(V,pprime:hasAnnotation,A),rdf(A,pprime:creator,P)), Ps0), sort(Ps0, Ps), length(Ps,N), N > 1. mbh_topN_eval_table(V) :- ( m_video(V), rdf(V, dc:id, literal(Id)), video_mbh_terms(V, Terms), tag_rank(V, Tags), length(Terms, TermCount), length(Tags, TagCount), ( TermCount is 0 -> format('~w', [Id]) ; TermCount2 is TermCount*2, pairs_values(Tags, Tags1), topN(Tags1, TermCount, Terms, TopN), topN(Tags1, TermCount2, Terms, Top2N), topN(Tags1, TagCount, Terms, All), format('~w,\t~w,\t~w,\t~2f,\t~2f,\t~2f~n', [Id, TagCount, TermCount, TopN, Top2N, All]) ), fail ; true ). topN(Tags, N, Terms, TopN) :- length(Terms, Term_Count), length(Tags, Tag_Count), ( Tag_Count > N -> length(Top_N_Tags, N), append(Top_N_Tags, _, Tags) ; Top_N_Tags = Tags ), maplist(downcase_atom, Top_N_Tags, Top_N_Tags1), sort(Top_N_Tags1, Top_N_Tags2), ord_intersect(Top_N_Tags2, Terms, Intersect), length(Intersect, Intersect_Count), TopN is Intersect_Count/Term_Count. video_mbh_terms(V, Terms) :- findall(C, (rdf(V, dc:subject, literal(C0), mbh_tag), downcase_atom(C0,C1), term_rm_dot(C1,C)), Terms0), sort(Terms0, Terms). term_rm_dot(T0,T) :- sub_atom(T0,_,1,0,A), ( A = '.' -> sub_atom(T0,0,_,1,T) ; T = T0 ). /******************************* * mbh to GTAA * *******************************/ mbh_to_gtaa(Pairs) :- video_mbh_terms(_, Terms), length(Terms, Term_Count), debug(mbh_gtaa, 'terms: ~w', [Term_Count]), gtaa_candidates(Terms, Pairs). gtaa_candidates([], []). gtaa_candidates([Term|Ts], [Term-Rs|Rest]) :- findall(C, (rdf_has(C, rdfs:label, literal(exact(Term),_)), once(rdfs_individual_of(C, skos:'Concept')) ), Rs0), sort(Rs0, Rs), length(Rs, Concept_Count), debug(mbh_gtaa, '~w :: ~w concepts', [Term, Concept_Count]), gtaa_candidates(Ts, Rest). /******************************* * C * *******************************/