vumix/commit
page rank experiments
author | Michiel Hildebrand |
---|---|
Mon May 21 10:11:47 2012 +0200 | |
committer | Michiel Hildebrand |
Mon May 21 10:11:47 2012 +0200 | |
commit | 8eac0c86b2e2c12d8d3588c19f3097ba06867cbc |
tree | 5a4b0841203aa7b645b6fa7c4b093cc9ce3a7dee |
parent | e07369e562569ecf237237082de7b0c17ab13cae |
Diff style: patch stat
diff --git a/lib/r_exp.pl b/lib/r_exp.pl index d0d9d0e..aa0fbaa 100644 --- a/lib/r_exp.pl +++ b/lib/r_exp.pl @@ -1,9 +1,10 @@ :- module(r_exp, []). -:- use_module(library(real)). +:- use_module(library('R')). :- use_module(library(tag_concept)). :- use_module(library(video_annotation)). +:- use_module(library(stop_words)). %v_compare(V1, V2) :- % video_gtaa_vector(V1, L1), @@ -35,9 +36,22 @@ list_to_vector(T1, [_|T2], [0|V]) :- list_to_vector(T1, T2, V). rtest :- - y <- rnorm(50), - <- y, - x <- rnorm(y), - <- x11(width=5,height=3.5), - <- plot(x,y), - devoff. + r_open, + y <- rnorm(50), + r_print( y ), + x <- rnorm(y), + r_in( x11(width=5,height=3.5) ), + r_in( plot(x,y) ), + write( 'Press Return to continue...' ), nl, + read_line_to_codes( user_input, _ ), + r_print( 'dev.off()' ), + r_close. + + +pr_test :- + r_open, + r_lib(igraph), + g <- 'random.graph.game'(20, 5/20, directed='TRUE'), + r_print( g ), + r_in( 'page.rank'(g) ), + r_close. diff --git a/lib/stop_words.pl b/lib/stop_words.pl index 130cda3..6a958e3 100644 --- a/lib/stop_words.pl +++ b/lib/stop_words.pl @@ -1,5 +1,21 @@ :- module(stop_words, - [stop_word/2]). + [remove_stop_words/3, + stop_word/2]). + + + +remove_stop_words([], _, []). +remove_stop_words([E|Tags], Lang, Filtered) :- + tag(E, Tag0), + downcase_atom(Tag0, Tag), + stop_word(Lang, Tag), + !, + remove_stop_words(Tags, Lang, Filtered). +remove_stop_words([Tag|Tags], Lang, [Tag|Filtered]) :- + remove_stop_words(Tags, Lang, Filtered). + +tag(_-Tag, Tag) :- !. +tag(Tag, Tag). :- multifile stop_word/2. diff --git a/lib/tag_concept.pl b/lib/tag_concept.pl index a840175..44362ba 100644 --- a/lib/tag_concept.pl +++ b/lib/tag_concept.pl @@ -2,8 +2,7 @@ [tag_concept/3, video_tags/2, video_concepts/3, - derived_concepts/4, - remove_stop_words/3 + derived_concepts/4 ]). @@ -11,10 +10,10 @@ :- use_module(library(semweb/rdfs)). :- use_module(library(semweb/rdf_label)). :- use_module(library(yaz_util)). -:- use_module(library(stop_words)). :- use_module(library(tfidf)). :- use_module(library(find_resource)). :- use_module(library(video_annotation)). +:- use_module(library(stop_words)). :- rdf_meta derived_concepts(+, :, r, -), @@ -59,6 +58,7 @@ tag_concept(Stem, Tag, Concept) :- -> Diff = 0.9 ), snowball(dutch, Tag, TagStem), + %sub_atom(TagStem, 0, _, 1, Prefix), rdf_has(Concept,rdfs:label, literal(prefix(TagStem), Lit)), rdf(Concept, skos:inScheme, gtaa:'GTAA'), literal_text(Lit, Label), @@ -67,7 +67,7 @@ tag_concept(Stem, Tag, Concept) :- ; snowball(dutch, Label, LabelStem), find_resource:literal_distance(TagStem, LabelStem, D), %isub(TagStem, Label, true, Sim), - D < 5 + D < 4 %Sim > Diff. ). @@ -332,11 +332,154 @@ list_concepts(Id, Type, Goal) :- ). -remove_stop_words([], _, []). -remove_stop_words([_Rank-Tag0|Tags], Lang, Filtered) :- - downcase_atom(Tag0, Tag), - stop_word(Lang, Tag), - !, - remove_stop_words(Tags, Lang, Filtered). -remove_stop_words([Tag|Tags], Lang, [Tag|Filtered]) :- - remove_stop_words(Tags, Lang, Filtered). + + /******************************* + * C * + *******************************/ + +:- use_module(library('R')). + + +concept_rank(Video, Ranked) :- + Goal = tag_concept(stem), + rdf_equal(Scheme, gtaa:'OnderwerpenBenG'), + video_concept_graph(Video, Scheme, Goal, Graph, Weights), + graph_rename(Graph, Assoc, Vector), + %show_graph(Vector), + page_rank(Vector, Weights, Scores0), % why do I have length(Vector)+1 scores??? + Scores0 = [_|Scores], + length(Scores, ScoreCount), + debug(c_graph, 'scores ~w', [ScoreCount]), + assoc_to_list(Assoc, Pairs0), + transpose_pairs(Pairs0, Pairs1), + keysort(Pairs1, Pairs), + pairs_values(Pairs, Concepts), + length(Concepts, ConceptCount), + debug(c_graph, 'concepts ~w', [ConceptCount]), + pairs_keys_values(Ranked0, Scores, Concepts), + keysort(Ranked0, Ranked1), + reverse(Ranked1, Ranked). + +concept_clusters(Video, Clusters) :- + Goal = tag_concept(stem), + rdf_equal(Scheme, gtaa:'OnderwerpenBenG'), + video_concept_graph(Video, Scheme, Goal, Graph, _Weights), + graph_rename(Graph, Assoc, Vector), + graph_cluster(Vector, Membership0), + Membership0 = [_|Membership], + length(Membership, ClusterCount), + debug(c_graph, 'clusters ~w', ClusterCount), + assoc_to_list(Assoc, Pairs0), + transpose_pairs(Pairs0, Pairs1), + keysort(Pairs1, Pairs), + pairs_values(Pairs, Concepts), + length(Concepts, ConceptCount), + debug(c_graph, 'concepts ~w', [ConceptCount]), + pairs_keys_values(Clusters0, Membership, Concepts), + keysort(Clusters0, Clusters). + +video_concept_graph(Video, Scheme, Goal, Graph, Weights) :- + tag_rank(Video, RankedTagList), + length(RankedTagList, Tag_Count), + debug(c_graph, '~w tags', [Tag_Count]), + interpretation_graph(RankedTagList, Goal, Scheme, IGraph, Weights0), + concept_graph(IGraph, CGraph, Weights1), + append(IGraph, CGraph, Graph), + append(Weights0, Weights1, Weights), + length(Graph, Edge_Count), + debug(c_graph, '~w edges', [Edge_Count]). + + +%concept_graph(_, [], []). +concept_graph(IGraph, CGraph, Weights) :- + findall(C, member(i(_T,C), IGraph), Cs0), + sort(Cs0, Cs), + length(Cs, Concept_Count), + debug(c_graph, '~w interpretations', [Concept_Count]), + findall(i(C,C1)-Score, (member(C, Cs), + expand_graph(C,C1,Score) + ), + Pairs), + pairs_keys_values(Pairs, CGraph, Weights). + + +interpretation_graph(Tags, Goal, Scheme, IGraph, Weights) :- + findall(i(Tag,C)-Score, (member(Score-Tag, Tags), + call(Goal, Tag, C), + once(rdf(C, skos:inScheme, Scheme)) + ), + Pairs0), + sort(Pairs0, Pairs), + pairs_keys_values(Pairs, IGraph, Weights). + +graph_cluster(Vector, Clusters) :- + r_open, + r_lib(igraph), + v <- Vector, + g <- graph(v), + Clusters <- 'clusters(g)$membership', + r_close. + +page_rank(Vector, Weights, Rank) :- + r_open, + r_lib(igraph), + v <- Vector, + w <- Weights, + g <- graph(v), + %r_print( g ), + Rank <- 'page.rank(g, weights = w)$vector', + %r_in( 'page.rank'(g, vids = 'V'(g), directed = 'TRUE', damping = 0.2, + % weights = 'NULL', options = 'igraph.arpack.default') ), + r_close. + +show_graph(Vector) :- + r_open, + r_lib(igraph), + v <- Vector, + g <- graph(v), + r_print( g ), + r_in( tkplot( g ) ), + write( 'Press Return to continue...' ), nl, + read_line_to_codes( user_input, _ ), + r_print( 'dev.off()' ), + r_close. + + +expand_graph(C, C1, 0.1) :- + rdf(C, skos:related, C1). +expand_graph(C, C1, 0.1) :- + rdf(C, skos:broader, C1). +expand_graph(C1, C, 0.1) :- + rdf_reachable(C1, skos:broader, C). + + + +%% graph_rename(+Graph, -Assoc, -NewGraph) +% +% Rename nodes so that they start counted by 1. + +graph_rename(Graph, Assoc, NewGraph) :- + empty_assoc(Assoc0), + rename_vertices(Graph, Assoc0, 1, NewGraph, Assoc). + +rename_vertices([], Assoc, _, [], Assoc). +rename_vertices([i(C1,C2)|T], Assoc0, N, [NewC1,NewC2|Rest], Assoc) :- + rename_vertex(C1, Assoc0, N, Assoc1, N1, NewC1), + rename_vertex(C2, Assoc1, N1, Assoc2, N2, NewC2), + rename_vertices(T, Assoc2, N2, Rest, Assoc). + +rename_vertex(C, Assoc, N, Assoc, N, New) :- + get_assoc(C, Assoc, New), + !. +rename_vertex(C, Assoc, N, Assoc1, N1, N) :- + N1 is N+1, + put_assoc(C, Assoc, N, Assoc1). + + +v_count(Graph, Count) :- + findall(V, (member(i(V,_), Graph) + ;member(i(_,V), Graph) + ), + Vs0), + sort(Vs0, Vs), + length(Vs, Count).