vumix/commit
split up process into separate steps
author | Michiel Hildebrand |
---|---|
Tue Jun 5 17:58:20 2012 +0200 | |
committer | Michiel Hildebrand |
Tue Jun 5 17:58:20 2012 +0200 | |
commit | 970607f9bdb79b36009029eddd8153a284b7b2e9 |
tree | 82275ed290a9f3e95e20e5ce1d9c92cc91b05918 |
parent | a56becc44b7a7f10f772d47e5aab9b976e21490c |
Diff style: patch stat
diff --git a/lib/tag_interpret.pl b/lib/tag_interpret.pl new file mode 100644 index 0000000..05fed66 --- /dev/null +++ b/lib/tag_interpret.pl @@ -0,0 +1,184 @@ +:- module(tag_interpret, + [interpret_tags/2 + ]). + +:- use_module(library(semweb/rdf_db)). +:- use_module(library(semweb/rdfs)). +:- use_module(library(real)). +:- use_module(library(tfidf)). +:- use_module(api(reconcile)). + + +i_test(Video, Pairs) :- + tag_rank(Video, RankedTags), + filter_tags(RankedTags, Tags), + interpret_tags(Tags, Pairs). + +filter_tags([], []). +filter_tags([Score-Tag|Ps], [Tag|Ts]) :- + Score > 0.01, + !, + filter_tags(Ps, Ts). +filter_tags([_|Ps], Ts) :- + filter_tags(Ps, Ts). + + +%% interpret_tags(+Tags, -Pairs) +% +% Pairs is a list of tag-concepts pairs. Where concepts is itself +% a list of ranked candidate score-concepts. + +interpret_tags(Tags, Tag_Interpretations) :- + tag_interpretations(Tags, Pairs), + pairs_values(Pairs, Concepts), + group_pairs_by_key(Pairs, TagConcepts), + concept_graph(Concepts, Graph), + page_rank(Graph, ConceptScores), + list_to_assoc(ConceptScores, ScoreAssoc), + tag_concept_scores(TagConcepts, ScoreAssoc, Tag_Interpretations). + +tag_interpretations(Tags, Pairs) :- + rdf_equal(skos:'Concept', Type), + findall(Tag-C, + (member(Tag,Tags), + reconcile(Tag, 10, Type, [], Hits), + member(hit(D,C,_,_), Hits), + D < 10 + ), + Pairs). + +tag_concept_scores([], _, []). +tag_concept_scores([Tag-Concepts|T], Assoc, [Tag-Ranked|Rest]) :- + concept_scores(Concepts, Assoc, ConceptScores), + keysort(ConceptScores, Ranked0), + reverse(Ranked0, Ranked), + tag_concept_scores(T, Assoc, Rest). + +concept_scores([], _, []). +concept_scores([C|Cs], Assoc, [Score-C|Rs]) :- + ( get_assoc(C, Assoc, V) + -> Score = V + ; Score = 0 + ), + concept_scores(Cs, Assoc, Rs). + + + /******************************* + * concept graph * + *******************************/ + +%% concept_graph(+List_Of_Concepts, -Graph) +% +% Graph is a list of weighted links between concepts, +% i(C1,C2,Distance). + +concept_graph(Concepts, Graph) :- + cartesian(Concepts, Concepts, Cartesian), + pair_distances(Cartesian, Graph). + + +pair_distances([], []). +pair_distances([[A,A]|T], Graph) :- + !, + pair_distances(T, Graph). +pair_distances([[A,B]|T], [Rel|Graph]) :- + Rel = i(A,B,Distance), + semantic_distance(A,B,Distance), + !, + pair_distances(T, Graph). +pair_distances([_|T], Graph) :- + pair_distances(T, Graph). + +semantic_distance(A, B, _) :- + rdf_has(A, rdfs:label, L), + rdf_has(B, rdfs:label, L), + !, + fail. % do not add distance relations between diff senses of the same interpretation. +semantic_distance(A, B, D) :- + rdf_reachable(B, skos:broader, A, 3, N), + !, + D is 0.05^N. +semantic_distance(A, B, D) :- + rdf_reachable(A, skos:broader, B, 2, N), + !, + D is 0.001^N. +semantic_distance(A, B, D) :- + rdf_reachable(A, skos:related, B, 2, N), + !, + D is 0.01^N. +semantic_distance(A, B, D) :- + rdf(A, P, B), + \+ rdfs_subproperty_of(P, skos:broader), + \+ rdfs_subproperty_of(P, skos:narrower), + \+ rdfs_subproperty_of(P, skos:related), + !, + D is 0.01. + + + /******************************* + * page rank * + *******************************/ + +%% page_rank(+Graph, -Ranked) +% +% Ranked is a list of score-node pairs, created by computing the +% page rank of Graph. + +page_rank(Graph, NodeScores) :- + weighted_graph_rename(Graph, Assoc, Vector, Weights), + r_page_rank(Vector, Weights, Scores0), + Scores0 = [_EigenVector|Scores], + assoc_to_list(Assoc, Pairs0), + transpose_pairs(Pairs0, Pairs1), + keysort(Pairs1, Pairs), + pairs_values(Pairs, Nodes), + pairs_keys_values(NodeScores, Nodes, Scores). + +r_page_rank(Vector, Weights, Rank) :- + <- library(igraph), + v <- Vector, + w <- Weights, + g <- graph(v), + Rank <- 'page.rank(g, weights = w)$vector'. + + +%% weighted_graph_rename(+Graph, -Assoc, -NewGraph, -Weights) +% +% Rename nodes so that they start counted by 1. + +weighted_graph_rename(Graph, Assoc, NewGraph, Weights) :- + empty_assoc(Assoc0), + rename_vertices(Graph, Assoc0, 1, NewGraph, Assoc, Weights). + +rename_vertices([], Assoc, _, [], Assoc, []). +rename_vertices([i(C1,C2,W)|T], Assoc0, N, [NewC1,NewC2|Rest], Assoc, [W|Weights]) :- + rename_vertex(C1, Assoc0, N, Assoc1, N1, NewC1), + rename_vertex(C2, Assoc1, N1, Assoc2, N2, NewC2), + rename_vertices(T, Assoc2, N2, Rest, Assoc, Weights). + +rename_vertex(C, Assoc, N, Assoc, N, New) :- + get_assoc(C, Assoc, New), + !. +rename_vertex(C, Assoc, N, Assoc1, N1, N) :- + N1 is N+1, + put_assoc(C, Assoc, N, Assoc1). + + + + /******************************* + * misc * + *******************************/ + +%% cartesian(+L1, +L2, -L3) +% +% L3 is the cartesian product of L1 and L2. + +cartesian([], _L, []). +cartesian([A|N], L, M) :- + pair(A,L,M1), + cartesian(N, L, M2), + append(M1, M2, M). + +pair(_A, [], []) :- !. +pair(A, [B|L], [[A,B]|N] ) :- + pair(A, L, N).