vumix/commit
page rank on semantic distance graph
author | Michiel Hildebrand |
---|---|
Wed May 30 22:29:01 2012 +0200 | |
committer | Michiel Hildebrand |
Wed May 30 22:29:01 2012 +0200 | |
commit | dc3b8287253e2ddebd7d7ba8c567585fcb30a5cc |
tree | e8b0300d251379677c077c1ce41c29aa84c66c6a |
parent | bfe8583c08e18de5d6516f59ab572b8a75a6531b |
Diff style: patch stat
diff --git a/applications/vumix_p0.pl b/applications/vumix_p0.pl index 80331eb..8f3e0c9 100644 --- a/applications/vumix_p0.pl +++ b/applications/vumix_p0.pl @@ -24,13 +24,14 @@ :- use_module(api(annotation)). :- use_module(library(user_process)). -:- use_module(api(reconcile)). -:- use_module(library(video_annotation)). +%:- use_module(api(reconcile)). +%:- use_module(library(video_annotation)). :- use_module(library(yaz_util)). -:- use_module(library(stop_words)). +%:- use_module(library(stop_words)). :- use_module(library(tfidf)). -:- use_module(library(tag_concept)). - +%:- use_module(library(tag_concept)). +:- use_module(library(semrank)). +%:- use_module(library(real)). /*************************************************** * http handlers @@ -127,13 +128,16 @@ http_vumix_p0(Request) :- ; logged_on(User, anonymous) ), user_process(User, Target, _), - tag_rank(Target, Tags0), - remove_stop_words(Tags0, dutch, Tags), + %tfidf_rank(Target, Concepts), + c_semantic_distance_rank(Target, Concepts), + html_page(Target, Fields, Concepts). + +tfidf_rank(Target, Concepts) :- + tag_rank(Target, Tags), link_tags_to_concepts(Tags, Concepts0), keysort(Concepts0, Concepts1), group_pairs_by_key(Concepts1, Concepts2), - pairs_sort_by_value_sum(Concepts2, Concepts), - html_page(Target, Fields, Concepts). + pairs_sort_by_value_sum(Concepts2, Concepts). %% link_tags_to_concepts(+Tags, -Concepts) % @@ -141,16 +145,16 @@ http_vumix_p0(Request) :- link_tags_to_concepts([], []). link_tags_to_concepts([Score-Tag|As], [Concept-Score|Rest]) :- - Score > 7, + %Score > 7, rdf_has(Concept,rdfs:label,literal(exact(Tag),_)), %reconcile(Tag, 3, _Type, [], Hits), %member(hit(D,Concept,_,_), Hits), %D < 3, %\+ rdf(Concept,skos:inScheme,gtaa:'OnderwerpenBenG'), % hack, to remove duplication between onderwerpenBenG en onderwerpen - ( rdf(Concept,skos:inScheme,gtaa:'OnderwerpenBenG') - -> true - ; rdf(Concept,skos:scopeNote,_) - ), %hack. Concepts without a scopenote are dubious + %( rdf(Concept,skos:inScheme,gtaa:'OnderwerpenBenG') + %-> true + %; rdf(Concept,skos:scopeNote,_) + %), %hack. Concepts without a scopenote are dubious !, link_tags_to_concepts(As, Rest). link_tags_to_concepts([_|As], Rest) :- diff --git a/lib/semrank.pl b/lib/semrank.pl new file mode 100644 index 0000000..0e2cc6b --- /dev/null +++ b/lib/semrank.pl @@ -0,0 +1,151 @@ +:- module(semrank, + [r_semantic_distance_rank/2, + c_semantic_distance_rank/2, + semantic_distance_rank/2 + ]). + +:- use_module(library(semweb/rdf_db)). +:- use_module(library(semweb/rdfs)). +:- use_module(library(real)). +:- use_module(library(tfidf)). +:- use_module(api(reconcile)). + +:- dynamic + rank_cache/2. + +c_semantic_distance_rank(Video, Ranked) :- + rank_cache(Video, Ranked). +c_semantic_distance_rank(Video, Ranked) :- + semantic_distance_rank(Video, Ranked), + assert(rank_cache(Video, Ranked)). + +r_semantic_distance_rank(Video, Ranked) :- + retractall(rank_cache(Video,_)), + semantic_distance_rank(Video, Ranked), + assert(rank_cache(Video, Ranked)). + +semantic_distance_rank(Video, Ranked) :- + tag_rank(Video, Tags), + tag_interpretations(Tags, I_Graph, Concepts), + length(Concepts, Concept_Count), + debug(semrank, 'concepts: ~w', [Concept_Count]), + semantic_distance_graph(Concepts, C_Graph), + append(I_Graph, C_Graph, Graph), + length(Graph, Edge_Count), + debug(semrank, 'distance graph size: ~w', [Edge_Count]), + weighted_graph_rename(Graph, Assoc, Vector, Weights), + length(Vector, V_Count), + debug(semrank, 'vector: ~w', [V_Count]), + page_rank(Vector, Weights, Scores0), + Scores0 = [_EigenVector|Scores], + assoc_to_list(Assoc, Pairs0), + transpose_pairs(Pairs0, Pairs1), + keysort(Pairs1, Pairs), + pairs_values(Pairs, Resources), + pairs_keys_values(Ranked0, Scores, Resources), + keysort(Ranked0, Ranked1), + reverse(Ranked1, Ranked). + +tag_interpretations(Tags, Edges, Concepts) :- + rdf_equal(skos:'Concept', Type), + findall(i(Tag,C,W), + (member(W-Tag,Tags), + W > 0.01, + reconcile(Tag, 10, Type, [], Hits), + member(hit(D,C,_,_), Hits), + D < 1.5 + ), + Edges), + findall(C, member(i(_,C,_), Edges), Cs), + sort(Cs, Concepts). + +page_rank(Vector, Weights, Rank) :- + <- library(igraph), + v <- Vector, + w <- Weights, + g <- graph(v), + Rank <- 'page.rank(g, weights = w)$vector'. + + +semantic_distance_graph(Concepts, Graph) :- + cartesian(Concepts, Concepts, Cartesian), + pair_distances(Cartesian, Graph). + + +pair_distances([], []). +pair_distances([[A,A]|T], Graph) :- + !, + pair_distances(T, Graph). +pair_distances([[A,B]|T], [Rel|Graph]) :- + Rel = i(A,B,Distance), + semantic_distance(A,B,Distance), + !, + pair_distances(T, Graph). +pair_distances([_|T], Graph) :- + pair_distances(T, Graph). + +semantic_distance(A, B, _) :- + rdf_has(A, rdfs:label, L), + rdf_has(B, rdfs:label, L), + !, + fail. % do not add distance relations between diff senses of the same interpretation. +semantic_distance(A, B, D) :- + rdf_reachable(B, skos:broader, A, 3, N), + !, + D is 0.5^N. +semantic_distance(A, B, D) :- + rdf_reachable(A, skos:broader, B, 2, N), + !, + D is 0.001^N. +semantic_distance(A, B, D) :- + rdf_reachable(A, skos:related, B, 2, N), + !, + D is 0.01^N. +/* +semantic_distance(A, B, D) :- + rdf(A, _, C), + ( B == C + -> D = 0.5 + ; rdf_reachable(B, skos:broader, C, 3, _) + -> D = 0.25 + ; rdf_reachable(C, skos:broader, B, 3, _) + -> D = 0.5 + ). +*/ + +%% semantic_distance(+L1, +L2, -Cartesian) +% +% Cartesian is the cartesian product of L1 and L2. + +cartesian([], _L, []). +cartesian([A|N], L, M) :- + pair(A,L,M1), + cartesian(N, L, M2), + append(M1, M2, M). + +pair(_A, [], []) :- !. +pair(A, [B|L], [[A,B]|N] ) :- + pair(A, L, N). + + + +%% weighted_graph_rename(+Graph, -Assoc, -NewGraph, -Weights) +% +% Rename nodes so that they start counted by 1. + +weighted_graph_rename(Graph, Assoc, NewGraph, Weights) :- + empty_assoc(Assoc0), + rename_vertices(Graph, Assoc0, 1, NewGraph, Assoc, Weights). + +rename_vertices([], Assoc, _, [], Assoc, []). +rename_vertices([i(C1,C2,W)|T], Assoc0, N, [NewC1,NewC2|Rest], Assoc, [W|Weights]) :- + rename_vertex(C1, Assoc0, N, Assoc1, N1, NewC1), + rename_vertex(C2, Assoc1, N1, Assoc2, N2, NewC2), + rename_vertices(T, Assoc2, N2, Rest, Assoc, Weights). + +rename_vertex(C, Assoc, N, Assoc, N, New) :- + get_assoc(C, Assoc, New), + !. +rename_vertex(C, Assoc, N, Assoc1, N1, N) :- + N1 is N+1, + put_assoc(C, Assoc, N, Assoc1). diff --git a/lib/tag_concept.pl b/lib/tag_concept.pl index f234417..0fe3b6d 100644 --- a/lib/tag_concept.pl +++ b/lib/tag_concept.pl @@ -413,7 +413,8 @@ concept_graph(IGraph, CGraph, Weights) :- interpretation_graph(Tags, Goal, Scheme, IGraph, Weights) :- findall(i(Tag,C)-Score, (member(Score-Tag, Tags), call(Goal, Tag, C), - once(rdf(C, skos:inScheme, Scheme)) + once(rdfs_individual_of(C, skos:'Concept')) + %once(rdf(C, skos:inScheme, Scheme)) ), Pairs0), sort(Pairs0, Pairs), @@ -434,7 +435,7 @@ page_rank(Vector, Weights, Rank) :- w <- Weights, g <- graph(v), %r_print( g ), - Rank <- 'page.rank(g, weights = w)$vector'. + Rank <- 'page.rank(g, directed = FALSE, weights = w)$vector'. %r_in( 'page.rank'(g, vids = 'V'(g), directed = 'TRUE', damping = 0.2, % weights = 'NULL', options = 'igraph.arpack.default') ), %r_close. @@ -610,3 +611,35 @@ term_rm_dot(T0,T) :- ; T = T0 ). + + + /******************************* + * mbh to GTAA * + *******************************/ + +mbh_to_gtaa(Pairs) :- + video_mbh_terms(_, Terms), + length(Terms, Term_Count), + debug(mbh_gtaa, 'terms: ~w', [Term_Count]), + gtaa_candidates(Terms, Pairs). + +gtaa_candidates([], []). +gtaa_candidates([Term|Ts], [Term-Rs|Rest]) :- + findall(C, (rdf_has(C, rdfs:label, literal(exact(Term),_)), + once(rdfs_individual_of(C, skos:'Concept')) + ), + Rs0), + sort(Rs0, Rs), + length(Rs, Concept_Count), + debug(mbh_gtaa, '~w :: ~w concepts', [Term, Concept_Count]), + gtaa_candidates(Ts, Rest). + + + + + /******************************* + * C * + *******************************/ + + + diff --git a/rdf/mbh_example.ttl b/rdf/mbh_example.ttl index 4c38a42..6c259db 100644 --- a/rdf/mbh_example.ttl +++ b/rdf/mbh_example.ttl @@ -40,7 +40,8 @@ pprime:subjectAnnotation a :annotationField ; rdfs:label "Onderwerp"@nl ; dc:comment "voeg subject termen toe"@nl ; - :scheme <http://data.beeldengeluid.nl/gtaa/OnderwerpenBenG> ; + :scheme <http://purl.org/vocabularies/cornetto> ; + #:scheme <http://data.beeldengeluid.nl/gtaa/OnderwerpenBenG> ; :source "/api/autocomplete?q={query}&filter={\"scheme\":\"http://data.beeldengeluid.nl/gtaa/OnderwerpenBenG\"}" . pprime:onderwerpAnnotation diff --git a/web/css/vumix.css b/web/css/vumix.css index 5d933a1..87771c6 100644 --- a/web/css/vumix.css +++ b/web/css/vumix.css @@ -3,7 +3,7 @@ height: 100%; } #fields .bd { - height: 560px; + height: 600px; } #fields .ft { border-top: 2px solid white;