vumix/commit

split up process into separate steps

authorMichiel Hildebrand
Tue Jun 5 17:58:20 2012 +0200
committerMichiel Hildebrand
Tue Jun 5 17:58:20 2012 +0200
commit970607f9bdb79b36009029eddd8153a284b7b2e9
tree82275ed290a9f3e95e20e5ce1d9c92cc91b05918
parenta56becc44b7a7f10f772d47e5aab9b976e21490c
Diff style: patch stat
diff --git a/lib/tag_interpret.pl b/lib/tag_interpret.pl
new file mode 100644
index 0000000..05fed66
--- /dev/null
+++ b/lib/tag_interpret.pl
@@ -0,0 +1,184 @@
+:- module(tag_interpret,
+	  [interpret_tags/2
+	  ]).
+
+:- use_module(library(semweb/rdf_db)).
+:- use_module(library(semweb/rdfs)).
+:- use_module(library(real)).
+:- use_module(library(tfidf)).
+:- use_module(api(reconcile)).
+
+
+i_test(Video, Pairs) :-
+	tag_rank(Video, RankedTags),
+	filter_tags(RankedTags, Tags),
+	interpret_tags(Tags, Pairs).
+
+filter_tags([], []).
+filter_tags([Score-Tag|Ps], [Tag|Ts]) :-
+	Score > 0.01,
+	!,
+	filter_tags(Ps, Ts).
+filter_tags([_|Ps], Ts) :-
+	filter_tags(Ps, Ts).
+
+
+%%	interpret_tags(+Tags, -Pairs)
+%
+%	Pairs is a list of tag-concepts pairs. Where concepts is itself
+%	a list of ranked candidate score-concepts.
+
+interpret_tags(Tags, Tag_Interpretations) :-
+	tag_interpretations(Tags, Pairs),
+	pairs_values(Pairs, Concepts),
+	group_pairs_by_key(Pairs, TagConcepts),
+	concept_graph(Concepts, Graph),
+	page_rank(Graph, ConceptScores),
+	list_to_assoc(ConceptScores, ScoreAssoc),
+	tag_concept_scores(TagConcepts, ScoreAssoc, Tag_Interpretations).
+
+tag_interpretations(Tags, Pairs) :-
+	rdf_equal(skos:'Concept', Type),
+	findall(Tag-C,
+		(member(Tag,Tags),
+		 reconcile(Tag, 10, Type, [], Hits),
+		 member(hit(D,C,_,_), Hits),
+		 D < 10
+		),
+		Pairs).
+
+tag_concept_scores([], _, []).
+tag_concept_scores([Tag-Concepts|T], Assoc, [Tag-Ranked|Rest]) :-
+	concept_scores(Concepts, Assoc, ConceptScores),
+	keysort(ConceptScores, Ranked0),
+	reverse(Ranked0, Ranked),
+	tag_concept_scores(T, Assoc, Rest).
+
+concept_scores([], _, []).
+concept_scores([C|Cs], Assoc, [Score-C|Rs]) :-
+	(   get_assoc(C, Assoc, V)
+	->  Score = V
+	;   Score = 0
+	),
+	concept_scores(Cs, Assoc, Rs).
+
+
+		 /*******************************
+		 *	  concept graph		*
+		 *******************************/
+
+%%	concept_graph(+List_Of_Concepts, -Graph)
+%
+%	Graph is a list of weighted links between concepts,
+%	i(C1,C2,Distance).
+
+concept_graph(Concepts, Graph) :-
+	 cartesian(Concepts, Concepts, Cartesian),
+	 pair_distances(Cartesian, Graph).
+
+
+pair_distances([], []).
+pair_distances([[A,A]|T], Graph) :-
+	!,
+	pair_distances(T, Graph).
+pair_distances([[A,B]|T], [Rel|Graph]) :-
+	Rel = i(A,B,Distance),
+	semantic_distance(A,B,Distance),
+	!,
+	pair_distances(T, Graph).
+pair_distances([_|T], Graph) :-
+	pair_distances(T, Graph).
+
+semantic_distance(A, B, _) :-
+	rdf_has(A, rdfs:label, L),
+	rdf_has(B, rdfs:label, L),
+	!,
+	fail. % do not add distance relations between diff senses of the same interpretation.
+semantic_distance(A, B, D) :-
+	rdf_reachable(B, skos:broader, A, 3, N),
+	!,
+	D is 0.05^N.
+semantic_distance(A, B, D) :-
+	rdf_reachable(A, skos:broader, B, 2, N),
+	!,
+	D is 0.001^N.
+semantic_distance(A, B, D) :-
+	rdf_reachable(A, skos:related, B, 2, N),
+	!,
+	D is 0.01^N.
+semantic_distance(A, B, D) :-
+	rdf(A, P, B),
+	\+ rdfs_subproperty_of(P, skos:broader),
+	\+ rdfs_subproperty_of(P, skos:narrower),
+	\+ rdfs_subproperty_of(P, skos:related),
+	!,
+	D is 0.01.
+
+
+		 /*******************************
+		 *	     page rank		*
+		 *******************************/
+
+%%	page_rank(+Graph, -Ranked)
+%
+%	Ranked is a list of score-node pairs, created by computing the
+%	page rank of Graph.
+
+page_rank(Graph, NodeScores) :-
+	weighted_graph_rename(Graph, Assoc, Vector, Weights),
+	r_page_rank(Vector, Weights, Scores0),
+	Scores0 = [_EigenVector|Scores],
+	assoc_to_list(Assoc, Pairs0),
+	transpose_pairs(Pairs0, Pairs1),
+	keysort(Pairs1, Pairs),
+	pairs_values(Pairs, Nodes),
+	pairs_keys_values(NodeScores, Nodes, Scores).
+
+r_page_rank(Vector, Weights, Rank) :-
+	<- library(igraph),
+	v <- Vector,
+	w <- Weights,
+	g <- graph(v),
+	Rank <- 'page.rank(g, weights = w)$vector'.
+
+
+%%	weighted_graph_rename(+Graph, -Assoc, -NewGraph, -Weights)
+%
+%	Rename nodes so that they start counted by 1.
+
+weighted_graph_rename(Graph, Assoc, NewGraph, Weights) :-
+	empty_assoc(Assoc0),
+	rename_vertices(Graph, Assoc0, 1, NewGraph, Assoc, Weights).
+
+rename_vertices([], Assoc, _, [], Assoc, []).
+rename_vertices([i(C1,C2,W)|T], Assoc0, N, [NewC1,NewC2|Rest], Assoc, [W|Weights]) :-
+	rename_vertex(C1, Assoc0, N, Assoc1, N1, NewC1),
+	rename_vertex(C2, Assoc1, N1, Assoc2, N2, NewC2),
+	rename_vertices(T, Assoc2, N2, Rest, Assoc, Weights).
+
+rename_vertex(C, Assoc, N, Assoc, N, New) :-
+	get_assoc(C, Assoc, New),
+	!.
+rename_vertex(C, Assoc, N, Assoc1, N1, N) :-
+	N1 is N+1,
+	put_assoc(C, Assoc, N, Assoc1).
+
+
+
+		 /*******************************
+		 *	       misc		*
+		 *******************************/
+
+%%	cartesian(+L1, +L2, -L3)
+%
+%	L3 is the cartesian product of L1 and L2.
+
+cartesian([], _L, []).
+cartesian([A|N], L, M) :-
+	pair(A,L,M1),
+	cartesian(N, L, M2),
+	append(M1, M2, M).
+
+pair(_A, [], []) :- !.
+pair(A, [B|L], [[A,B]|N] ) :-
+	pair(A, L, N).