:- module(tag_cluster, [tag_cluster/3 ]). :- use_module(library(semweb/rdf_db)). :- use_module(library(semweb/rdfs)). :- use_module(library(real)). :- use_module(library(tfidf)). :- use_module(api(reconcile)). tag_cluster(Video, Clusters, TopClusterTags) :- tag_rank(Video, RankedTags), pairs_values(RankedTags, Tags), length(RankedTags, Tag_Count), debug(semrank, 'tags: ~w', [Tag_Count]), tag_interpretations(RankedTags, Interpretations), list_to_assoc(Interpretations, Assoc), pairs_keys(Interpretations, TagsWithI0), sort(TagsWithI0, TagsWithI), length(TagsWithI, TagsWithI_Count), debug(semrank, 'tags with interpretation: ~w', [TagsWithI_Count]), tag_distance_matrix(Tags, Tags, Assoc, Matrix), tag_clustering(Matrix, ClusterIndex), pairs_keys_values(Membership0, ClusterIndex, RankedTags), keysort(Membership0, Membership), group_pairs_by_key(Membership, Clusters), maplist(top_tag, Clusters, TopClusterTags0), keysort(TopClusterTags0, TopClusterTags1), reverse(TopClusterTags1, TopClusterTags). top_tag(_-[Tag|_], Tag). tag_clustering(Matrix, Clusters) :- m <- Matrix, Clusters <- 'kmeans(m, 15)$cluster'. tag_distance_matrix([], _, _, []). tag_distance_matrix([Tag|Ts], Tags, Assoc, [Row|Rows]) :- tag_distances(Tags, Tag, Assoc, Row), tag_distance_matrix(Ts, Tags, Assoc, Rows). tag_distances([], _, _, []). tag_distances([Tag2|Tags], Tag1, Assoc, [D|Rows]) :- tag_distance(Tag1, Tag2, Assoc, D), tag_distances(Tags, Tag1, Assoc, Rows). tag_distance(T, T, _, 1) :- !. tag_distance(T1, T2, Assoc, Distance) :- findall(D, ( tag_concept(T1,Assoc,C1), tag_concept(T2,Assoc,C2), concept_distance(C1, C2, D) ), Ds), ( Ds = [] -> Distance = 0 ; max_list(Ds, Distance) ). concept_distance(A, A, 1) :- !. concept_distance(A, B, D) :- rdf_reachable(A, skos:broader, C, 3, N1), rdf_reachable(B, skos:broader, C, 3, N2), D is 0.5^(N1+N2). concept_distance(A, B, D) :- rdf_reachable(A, skos:related, B, 2, N), D is 0.5^N. tag_concept(Tag, I, Concept) :- get_assoc(Tag, I, Concepts), member(Concept, Concepts). tag_interpretations([], []). %tag_interpretations([W-_Tag|Tags], Pairs) :- % W < 0.005, % !, % tag_interpretations(Tags, Pairs). tag_interpretations([_W-Tag|Tags], [Tag-Concepts|Pairs]) :- rdf_equal(skos:'Concept', Type), findall(C, (reconcile(Tag, 10, Type, [], Hits), member(hit(D,C,_,_), Hits), D < 6 ), Concepts), tag_interpretations(Tags, Pairs). mbh_cluster_eval_table(V) :- ( m_video(V), rdf(V, dc:id, literal(Id)), video_mbh_terms(V, Terms), length(Terms, TermCount), tag_cluster(V, _Clusters, RankedTags), pairs_values(RankedTags, Tags), ( TermCount = 0 -> format('~w', [Id]) ; intersect(Tags, Terms, Intersect), length(Intersect, I_Count), TopN is I_Count/TermCount, format('~w,~w~n', [Id, TopN]) ), fail ; true ). intersect(Tags, Terms, Intersect) :- maplist(downcase_atom, Tags, Tags1), sort(Tags1, Tags2), ord_intersect(Tags2, Terms, Intersect).