vumix/commit
cleanup some code
author | Michiel Hildebrand |
---|---|
Wed May 16 20:48:41 2012 +0200 | |
committer | Michiel Hildebrand |
Wed May 16 20:48:41 2012 +0200 | |
commit | 8898bc8c7fb378c7314ee2f71d0cb0845915c343 |
tree | f6d919faff8fb7918c42d05384fac1c6cb86078a |
parent | 1ad40dbe516f35025b5c2c353e7c461ef87fa7b9 |
Diff style: patch stat
diff --git a/applications/vumix_p0.pl b/applications/vumix_p0.pl index bb40108..80331eb 100644 --- a/applications/vumix_p0.pl +++ b/applications/vumix_p0.pl @@ -29,7 +29,7 @@ :- use_module(library(yaz_util)). :- use_module(library(stop_words)). :- use_module(library(tfidf)). -:- use_module(library(find_resource)). +:- use_module(library(tag_concept)). /*************************************************** @@ -168,15 +168,6 @@ link_tags_to_concepts([_|As], Rest) :- html_page(Target, Fields, Concepts). */ -remove_stop_words([], _, []). -remove_stop_words([_Rank-Tag0|Tags], Lang, Filtered) :- - downcase_atom(Tag0, Tag), - stop_word(Lang, Tag), - !, - remove_stop_words(Tags, Lang, Filtered). -remove_stop_words([Tag|Tags], Lang, [Tag|Filtered]) :- - remove_stop_words(Tags, Lang, Filtered). - /* %% link_tags_to_concepts(+Annotations, -Annotations1) @@ -494,6 +485,8 @@ js_annotation_field(FieldURI, Target) --> gtaa_concept_with_tag(r,r,-). +key_score(R, 1-R). + %% http_vumix_overview(+Request) % % @@ -512,11 +505,23 @@ http_vumix_overview(Request) :- tags_with_gtaa_concepts(FilteredTags, TagsWithConcept), gtaa_concepts(FilteredTags, gtaa:'GTAA', Concepts), - gtaa_concepts(FilteredTags, gtaa:'OnderwerpenBenG', Onderwerpen), + %gtaa_concepts(FilteredTags, gtaa:'OnderwerpenBenG', Onderwerpen), + Goal = tag_concept(stem(0.9)), + %Goal = tag_concept(exact), + derived_concepts(FilteredTags, Goal, gtaa:'OnderwerpenBenG', Onderwerpen0), + maplist(key_score, Onderwerpen0, Onderwerpen), + gtaa_classifications(Onderwerpen, Classes0), sort(Classes0, Classes1), group_pairs_by_key(Classes1, Classes), + video_concepts(Target, gtaa:'GTAA', Controlled0), + maplist(key_score, Controlled0, Controlled), + gtaa_classifications(Controlled, C_Classes0), + sort(C_Classes0, C_Classes1), + group_pairs_by_key(C_Classes1, C_Classes), + + length(Controlled, ControlledCount), length(Tags0, TagCount), length(FilteredTags, TagFilteredCount), length(TagsWithConcept, TagWithConceptCount), @@ -533,7 +538,11 @@ http_vumix_overview(Request) :- a(href(Target), Target) ]), div(id(bd), - [ div(id(tags), + [ div(id(controlled), + [ h4(['GTAA controlled concepts (',ControlledCount,')']), + ul(\html_classes(C_Classes)) + ]), + div(id(tags), [ h4(['Tags (',TagCount,')']), \html_tags(Tags0) ]), @@ -569,8 +578,9 @@ html_tags([Score0-Tag|T]) --> html_concepts([]) --> !. -html_concepts([Score0-Concept|T]) --> - { rdf_display_label(Concept, Label), +html_concepts([C|T]) --> + { c(C, Concept, Score0), + rdf_display_label(Concept, Label), Score is round(Score0) }, html(li([Label, @@ -578,6 +588,9 @@ html_concepts([Score0-Concept|T]) --> div(Concept)])), html_concepts(T). +c(Score-Concept, Concept, Score) :- !. +c(Concept, Concept, 1). + html_classes([]) --> !. html_classes([Class-Concepts|T]) --> { rdf_display_label(Class, Label), @@ -616,6 +629,7 @@ gtaa_concepts(Tags, Scheme, Concepts) :- gtaa_concepts_([], _, []). gtaa_concepts_([Score-Tag|As], Scheme, [Score-Concept|Rest]) :- + tag_concept(stem(0.8), Tag, Concept), %snowball(dutch, Tag, Stem), %rdf_has(Concept,rdfs:label,literal(prefix(Stem),L)), %literal_text(L,Lit), @@ -623,7 +637,6 @@ gtaa_concepts_([Score-Tag|As], Scheme, [Score-Concept|Rest]) :- %atom_length(Tag,TN), %Diff is abs(LN-TN), %Diff < 5, - rdf_has(Concept,rdfs:label,literal(exact(Tag),_)), rdf(Concept,skos:inScheme, Scheme), !, gtaa_concepts_(As, Scheme, Rest). @@ -748,307 +761,3 @@ rel_concept(C,C). - - /******************************* - * experiment * - *******************************/ - - -baseline_video(V) :- - rdf(V, rdf:type, pprime:'Baseline'), - once(rdf(V, dc:subject, _)). - -derived_concepts(Tags, Goal, Scheme, Concepts) :- - findall(C, (member(_-Tag, Tags), - call(Goal, Tag, C), - once(rdf(C, skos:inScheme, Scheme)) - ), - Concepts0), - sort(Concepts0, Concepts). - -derived_ranked_concepts(Tags, Goal, Scheme, Ranked) :- - findall(C-Tag, (member(_-Tag, Tags), - call(Goal, Tag, C), - once(rdf(C, skos:inScheme, Scheme)) - ), - Pairs0), - keysort(Pairs0, Pairs), - group_pairs_by_key(Pairs, Groups), - pairs_sort_by_value_count(Groups, Ranked). - -tag_concept(exact, Tag, Concept) :- - rdf_has(Concept, rdfs:label, literal(exact(Tag),_)), - rdf(Concept, skos:inScheme, gtaa:'GTAA'). - -tag_concept(Stem, Tag, Concept) :- - ( Stem = stem(Diff) - -> true - ; Stem = stem - -> Diff = 0.9 - ), - snowball(dutch, Tag, TagStem), - rdf_has(Concept,rdfs:label, literal(prefix(TagStem), Lit)), - rdf(Concept, skos:inScheme, gtaa:'GTAA'), - literal_text(Lit, Label), - isub(TagStem, Label, true, Sim), - Sim > Diff. - -tag_concept(sub, Tag, Concept) :- - length(Results, 3), - snowball(dutch, Tag, TagStem), - find_resource_by_name(TagStem, Hits, [match(prefix),distance(true)]), - length(Hits, N), - ( N =< 3 - -> Results = Hits - ; append(Results, _, Hits) - ), - member(hit(_D,Concept,_,_), Hits), - rdf(Concept,skos:inScheme, gtaa:'GTAA'). - - - -tag_related_concept(StringMatch, Tag, Concept) :- - tag_concept(StringMatch, Tag, C1), - ( Concept = C1 - ; related(C1, Concept) - ). - -tag_tree_concept(StringMatch, Tag, Concept) :- - tag_concept(StringMatch, Tag, C1), - ( Concept = C1 - ; tree(C1, Concept) - ). - -tag_tree_and_related_concept(StringMatch, Tag, Concept) :- - tag_concept(StringMatch, Tag, C1), - ( C1 = Concept - ; related(C1, Concept) - ; tree(C1, Concept) - ). - -tag_tree_related_concept(StringMatch, Tag, Concept) :- - tag_concept(StringMatch, Tag, C1), - ( C1 = Concept - ; related_tree(C1, Concept) - ; tree_related(C1, Concept) - ). - -tag_tree_related_sibbling_concept(StringMatch, Tag, Concept) :- - tag_concept(StringMatch, Tag, C1), - ( C1 = Concept - ; related_tree(C1, Concept) - ; tree_related(C1, Concept) - ; sibbling(C1, Concept) - ). - -related_tree(C, Concept) :- - related(C, C1), - ( C1 = Concept - ; tree(C1, Concept) - ). - -tree_related(C, Concept) :- - tree(C, C1), - ( C1 = Concept - ; related(C1, Concept) - ). - -tree(C1, C2) :- - ( rdf_reachable(C1, skos:broader, C2) - %; rdf_reachable(C2, skos:broader, C1) - ). - -sibbling(C1, C2) :- - rdf(C1, skos:broader, P), - rdf(C2, skos:broader, P). - -related(C1, C2) :- - rdf(C1, skos:related, C2). - -baseline_eval([], _, _, []). -baseline_eval([C|Cs], Video, Scheme, [C|Rest]) :- - rdf(Video, dc:subject, C), - rdf(C, skos:inScheme, Scheme), - !, - baseline_eval(Cs, Video, Scheme, Rest). -baseline_eval([_|Cs], Video, Scheme, Rest) :- - baseline_eval(Cs, Video, Scheme, Rest). - -stats_table(Type) :- - scheme_alias(Type, Scheme), - format('video\tt\tt_st\tc_ex\tc_st\tc_st_r\tbase~n',[]), - ( baseline_video(V), - rdf(V, dc:id, literal(Id)), - video_concepts(V, Scheme, Concepts), - findall(1-T, video_tag(V, literal(T)), Tags), - remove_stop_words(Tags, dutch, Tags1), - derived_concepts(Tags1, tag_concept(exact), Scheme, I_ExactMatch), - derived_concepts(Tags1, tag_concept(stem), Scheme, I_StemMatch), - derived_concepts(Tags1, tag_tree_and_related_concept(stem), Scheme, I_StemTR), - length(Concepts, ConceptCount), - length(Tags, TagCount), - length(Tags1, TagFilteredCount), - length(I_ExactMatch, ExactCount), - length(I_StemMatch, StemCount), - length(I_StemTR, StemTRCount), - format('~w,\t~w,\t~w,\t~w,\t~w,\t~w,\t~w\t', [Id, TagCount, TagFilteredCount, ExactCount, StemCount, StemTRCount, ConceptCount]), - concept_eval(V, Tags1, Scheme, tag_concept(exact)), - concept_eval(V, Tags1, Scheme, tag_concept(stem)), - concept_eval(V, Tags1, Scheme, tag_tree_and_related_concept(stem)), - - format('~n'), - fail - ; true - ). - -eval_table(Type) :- - scheme_alias(Type, Scheme), - ( baseline_video(V), - rdf(V, dc:id, literal(Id)), - findall(1-T, video_tag(V, literal(T)), Tags), - remove_stop_words(Tags, dutch, Tags1), - video_concepts(V, Scheme, Concepts), - length(Tags1, TagCount), - length(Concepts, ConceptCount), - format('~w, ~w\t ~w, ', [Id, TagCount, ConceptCount]), - concept_eval(V, Tags1, Scheme, tag_concept(exact)), - concept_eval(V, Tags1, Scheme, tag_related_concept(exact)), - concept_eval(V, Tags1, Scheme, tag_tree_concept(exact)), - concept_eval(V, Tags1, Scheme, tag_tree_related_concept(exact)), - concept_eval(V, Tags1, Scheme, tag_tree_and_related_concept(exact)), - concept_eval(V, Tags1, Scheme, tag_tree_related_sibbling_concept(exact)), - format(' '), - concept_eval(V, Tags1, Scheme, tag_concept(stem)), - concept_eval(V, Tags1, Scheme, tag_related_concept(stem)), - concept_eval(V, Tags1, Scheme, tag_tree_concept(stem)), - concept_eval(V, Tags1, Scheme, tag_tree_and_related_concept(stem)), - concept_eval(V, Tags1, Scheme, tag_tree_related_concept(stem)), - concept_eval(V, Tags1, Scheme, tag_tree_related_sibbling_concept(stem)), - %format(' '), - %concept_eval(V, tag_concept(sub)), - %concept_eval(V, tag_related_concept(sub)), - %concept_eval(V, tag_tree_concept(sub)), - %concept_eval(V, tag_tree_related_concept(sub)), - format('~n'), - fail - ; true - ). - -topN_eval_table(Type, N) :- - scheme_alias(Type, Scheme), - ( baseline_video(V), - rdf(V, dc:id, literal(Id)), - video_concepts(V, Scheme, Concepts), - video_tags(V, Tags), - remove_stop_words(Tags, dutch, Tags1), - - length(Concepts, ConceptCount), - length(Tags, TagCount), - format('~w,\t~w,\t~w, ', [Id, TagCount, ConceptCount]), - - - topN_concept_eval(V, Tags1, Scheme, N, tag_concept(exact)), - topN_concept_eval(V, Tags1, Scheme, N, tag_related_concept(exact)), - topN_concept_eval(V, Tags1, Scheme, N, tag_tree_concept(exact)), - topN_concept_eval(V, Tags1, Scheme, N, tag_tree_related_concept(exact)), - topN_concept_eval(V, Tags1, Scheme, N, tag_tree_and_related_concept(exact)), - topN_concept_eval(V, Tags1, Scheme, N, tag_tree_related_sibbling_concept(exact)), - format(' '), - topN_concept_eval(V, Tags1, Scheme, N, tag_concept(stem)), - topN_concept_eval(V, Tags1, Scheme, N, tag_related_concept(stem)), - topN_concept_eval(V, Tags1, Scheme, N, tag_tree_concept(stem)), - topN_concept_eval(V, Tags1, Scheme, N, tag_tree_and_related_concept(stem)), - topN_concept_eval(V, Tags1, Scheme, N, tag_tree_related_concept(stem)), - topN_concept_eval(V, Tags1, Scheme, N, tag_tree_related_sibbling_concept(stem)), - format('~n'), - fail - ; true - ). - -video_tags(Video, Tags) :- - findall(1-T, video_tag(Video, literal(T)), Tags). - -video_concepts(Video, Scheme, Concepts) :- - findall(C, (rdf(Video, dc:subject, C), - rdf(C, skos:inScheme, Scheme) - ), Concepts0), - sort(Concepts0, Concepts). - - -scheme_alias(all, Scheme) :- - rdf_equal(Scheme, gtaa:'GTAA'). -scheme_alias(persons, Scheme) :- - rdf_equal(Scheme, gtaa:'Peroonsnamen'). -scheme_alias(places, Scheme) :- - rdf_equal(Scheme, gtaa:'GeografischeNamen'). -scheme_alias(subjects, Scheme) :- - rdf_equal(Scheme, gtaa:'OnderwerpenBenG'). -scheme_alias(names, Scheme) :- - rdf_equal(Scheme, gtaa:'Namen'). - - -concept_eval(V, Tags, Scheme, Goal) :- - derived_concepts(Tags, Goal, Scheme, Derived), - baseline_eval(Derived,V,Scheme,Eval), - %length(Derived,DerivedCount), - length(Eval,EvalCount), -%format('~w, ', [EvalCount]). - format('~w,\t', [EvalCount]). - - -topN_concept_eval(V, Tags, Scheme, N, Goal) :- - derived_ranked_concepts(Tags, Goal, Scheme, Ranked), - length(Ranked, Count), - ( Count < N - -> Top = Ranked - ; length(Top, N), - append(Top, _, Ranked) - ), - pairs_values(Ranked, Ranked1), - pairs_values(Top, Top1), - baseline_eval(Ranked1, V, Scheme, Eval), - baseline_eval(Top1, V, Scheme, InTopN), - length(Eval, EvalN), - length(InTopN, TopN), - format('~w, ~w, ', [EvalN, TopN]). - - - -%extend_gtaa_hierarchy :- - -gtaa_wn_hierarchy(GTAA, GTAA_Parent) :- - rdf(GTAA, skos:exactMatch, WN), - rdf_reachable(WN, skos:broader, WN_Parent), - WN_Parent \== WN, - rdf(GTAA_Parent, skos:exactMatch, WN_Parent). - %rdf_assert(GTAA, skos:broader, GTAA_Parent, gtaa_wordnet_broader). - - - -list_concepts(Id, Type, Goal) :- - scheme_alias(Type, Scheme), - rdf(V, dc:id, literal(Id)), - video_concepts(V, Scheme, Concepts), - format('prof. annotations~n'), - ( member(C, Concepts), - display_label(C, L), - format('~w~n', [L]), - fail - ; true - ), - format('~n derived concepts~n'), - video_tags(V, Tags), - remove_stop_words(Tags, dutch, Tags1), - derived_concepts(Tags1, Goal, Scheme, Derived), - ( member(C, Derived), - display_label(C, L), - format('~w ~n', [L]), - fail - ; true - ), - format('~n User tags~n'), - ( member(_-T, Tags1), - format('~w ~n', [T]), - fail - ; true - ). diff --git a/lib/beng_xml.pl b/lib/beng_xml.pl index e41e14e..8271939 100644 --- a/lib/beng_xml.pl +++ b/lib/beng_xml.pl @@ -194,10 +194,11 @@ link_beng_waisda([expressie(Dir,Date0,Ss)|T], Linked) :- ; FN == SN -> debug(beng_link, 'linked: ~w ~w', [Dir, Date0]), pairs_keys_values(Pairs, Fragments, Selections) - ; FN is SN-1 % last item seems to be missing (ons kent ons) - -> debug(beng_link, 'removed last element: ~w ~w', [Dir, Date0]), - append(Selections1, [_], Selections), - pairs_keys_values(Pairs, Fragments, Selections1) + % do something smarter here :) + %; FN is SN-1 % last item seems to be missing (ons kent ons) + %-> debug(beng_link, 'removed last element: ~w ~w', [Dir, Date0]), + % append(Selections1, [_], Selections), + % pairs_keys_values(Pairs, Fragments, Selections1) ; debug(beng_link, 'unmatching count: ~w ~w ~w ~w', [Dir, Date0, FN, SN]), Pairs = [] ), diff --git a/lib/r_exp.pl b/lib/r_exp.pl new file mode 100644 index 0000000..d0d9d0e --- /dev/null +++ b/lib/r_exp.pl @@ -0,0 +1,43 @@ +:- module(r_exp, + []). + +:- use_module(library(real)). +:- use_module(library(tag_concept)). +:- use_module(library(video_annotation)). + +%v_compare(V1, V2) :- +% video_gtaa_vector(V1, L1), +% video_gtaa_vector(V2, L2), +% r_lib(lsa), +% v1 <- L1, +% v2 <- L2, +% cosine(v1, v2). + +video_gtaa_vector(V, Vector) :- + rdf_equal(gtaa:'OnderwerpenBenG', Scheme), + video_tags(V, Tags), + remove_stop_words(Tags, dutch, Tags1), + derived_concepts(Tags1, tag_concept(exact), Scheme, Cs0), + sort(Cs0, Cs), + gtaa_vector(Cs, Scheme, Vector). + + +gtaa_vector(Concepts, Scheme, Vector) :- + findall(C, rdf(C, skos:inScheme, Scheme), Cs0), + sort(Cs0, Cs), + list_to_vector(Concepts, Cs, Vector). + +list_to_vector([], [], []). +list_to_vector([H|T1], [H|T2], [1|V]) :- + !, + list_to_vector(T1, T2, V). +list_to_vector(T1, [_|T2], [0|V]) :- + list_to_vector(T1, T2, V). + +rtest :- + y <- rnorm(50), + <- y, + x <- rnorm(y), + <- x11(width=5,height=3.5), + <- plot(x,y), + devoff. diff --git a/lib/tag_concept.pl b/lib/tag_concept.pl new file mode 100644 index 0000000..a840175 --- /dev/null +++ b/lib/tag_concept.pl @@ -0,0 +1,342 @@ +:- module(tag_concept, + [tag_concept/3, + video_tags/2, + video_concepts/3, + derived_concepts/4, + remove_stop_words/3 + ]). + + +:- use_module(library(semweb/rdf_db)). +:- use_module(library(semweb/rdfs)). +:- use_module(library(semweb/rdf_label)). +:- use_module(library(yaz_util)). +:- use_module(library(stop_words)). +:- use_module(library(tfidf)). +:- use_module(library(find_resource)). +:- use_module(library(video_annotation)). + +:- rdf_meta + derived_concepts(+, :, r, -), + video_concepts(r, r, -). + + + /******************************* + * experiment * + *******************************/ + + +baseline_video(V) :- + rdf(V, rdf:type, pprime:'Baseline'), + once(rdf(V, dc:subject, _)). + +derived_concepts(Tags, Goal, Scheme, Concepts) :- + findall(C, (member(_Score-Tag, Tags), + call(Goal, Tag, C), + once(rdf(C, skos:inScheme, Scheme)) + ), + Concepts0), + sort(Concepts0, Concepts). + +derived_ranked_concepts(Tags, Goal, Scheme, Ranked) :- + findall(C-Tag, (member(_-Tag, Tags), + call(Goal, Tag, C), + once(rdf(C, skos:inScheme, Scheme)) + ), + Pairs0), + keysort(Pairs0, Pairs), + group_pairs_by_key(Pairs, Groups), + pairs_sort_by_value_count(Groups, Ranked). + +tag_concept(exact, Tag, Concept) :- + rdf_has(Concept, rdfs:label, literal(exact(Tag),_)), + rdf(Concept, skos:inScheme, gtaa:'GTAA'). + +tag_concept(Stem, Tag, Concept) :- + ( Stem = stem(Diff) + -> true + ; Stem = stem + -> Diff = 0.9 + ), + snowball(dutch, Tag, TagStem), + rdf_has(Concept,rdfs:label, literal(prefix(TagStem), Lit)), + rdf(Concept, skos:inScheme, gtaa:'GTAA'), + literal_text(Lit, Label), + ( Label = Tag + -> true + ; snowball(dutch, Label, LabelStem), + find_resource:literal_distance(TagStem, LabelStem, D), + %isub(TagStem, Label, true, Sim), + D < 5 + %Sim > Diff. + ). + +tag_concept(sub, Tag, Concept) :- + length(Results, 3), + snowball(dutch, Tag, TagStem), + find_resource_by_name(TagStem, Hits, [match(prefix),distance(true)]), + length(Hits, N), + ( N =< 3 + -> Results = Hits + ; append(Results, _, Hits) + ), + member(hit(_D,Concept,_,_), Hits), + rdf(Concept,skos:inScheme, gtaa:'GTAA'). + + + +tag_related_concept(StringMatch, Tag, Concept) :- + tag_concept(StringMatch, Tag, C1), + ( Concept = C1 + ; related(C1, Concept) + ). + +tag_tree_concept(StringMatch, Tag, Concept) :- + tag_concept(StringMatch, Tag, C1), + ( Concept = C1 + ; tree(C1, Concept) + ). + +tag_tree_and_related_concept(StringMatch, Tag, Concept) :- + tag_concept(StringMatch, Tag, C1), + ( C1 = Concept + ; related(C1, Concept) + ; tree(C1, Concept) + ). + +tag_tree_related_concept(StringMatch, Tag, Concept) :- + tag_concept(StringMatch, Tag, C1), + ( C1 = Concept + ; related_tree(C1, Concept) + ; tree_related(C1, Concept) + ). + +tag_tree_related_sibbling_concept(StringMatch, Tag, Concept) :- + tag_concept(StringMatch, Tag, C1), + ( C1 = Concept + ; related_tree(C1, Concept) + ; tree_related(C1, Concept) + ; sibbling(C1, Concept) + ). + +related_tree(C, Concept) :- + related(C, C1), + ( C1 = Concept + ; tree(C1, Concept) + ). + +tree_related(C, Concept) :- + tree(C, C1), + ( C1 = Concept + ; related(C1, Concept) + ). + +tree(C1, C2) :- + ( rdf_reachable(C1, skos:broader, C2) + ; rdf_reachable(C2, skos:broader, C1) + ). + +sibbling(C1, C2) :- + rdf(C1, skos:broader, P), + rdf(C2, skos:broader, P). + +related(C1, C2) :- + rdf(C1, skos:related, C2). + +baseline_eval([], _, _, []). +baseline_eval([C|Cs], Video, Scheme, [C|Rest]) :- + rdf(Video, dc:subject, C), + rdf(C, skos:inScheme, Scheme), + !, + baseline_eval(Cs, Video, Scheme, Rest). +baseline_eval([_|Cs], Video, Scheme, Rest) :- + baseline_eval(Cs, Video, Scheme, Rest). + +stats_table(Type) :- + scheme_alias(Type, Scheme), + format('video\tt\tt_st\tc_ex\tc_st\tc_st_r\tbase~n',[]), + ( baseline_video(V), + rdf(V, dc:id, literal(Id)), + video_concepts(V, Scheme, Concepts), + findall(1-T, video_tag(V, literal(T)), Tags), + remove_stop_words(Tags, dutch, Tags1), + derived_concepts(Tags1, tag_concept(exact), Scheme, I_ExactMatch), + derived_concepts(Tags1, tag_concept(stem), Scheme, I_StemMatch), + derived_concepts(Tags1, tag_tree_and_related_concept(stem), Scheme, I_StemTR), + length(Concepts, ConceptCount), + length(Tags, TagCount), + length(Tags1, TagFilteredCount), + length(I_ExactMatch, ExactCount), + length(I_StemMatch, StemCount), + length(I_StemTR, StemTRCount), + format('~w,\t~w,\t~w,\t~w,\t~w,\t~w,\t~w\t', [Id, TagCount, TagFilteredCount, ExactCount, StemCount, StemTRCount, ConceptCount]), + concept_eval(V, Tags1, Scheme, tag_concept(exact)), + concept_eval(V, Tags1, Scheme, tag_concept(stem)), + concept_eval(V, Tags1, Scheme, tag_tree_and_related_concept(stem)), + + format('~n'), + fail + ; true + ). + +eval_table(Type) :- + scheme_alias(Type, Scheme), + ( baseline_video(V), + rdf(V, dc:id, literal(Id)), + findall(1-T, video_tag(V, literal(T)), Tags), + remove_stop_words(Tags, dutch, Tags1), + video_concepts(V, Scheme, Concepts), + length(Tags1, TagCount), + length(Concepts, ConceptCount), + format('~w, ~w, ~w, ', [Id, TagCount, ConceptCount]), + concept_eval(V, Tags1, Scheme, tag_concept(exact)), + concept_eval(V, Tags1, Scheme, tag_related_concept(exact)), + concept_eval(V, Tags1, Scheme, tag_tree_concept(exact)), + concept_eval(V, Tags1, Scheme, tag_tree_related_concept(exact)), + concept_eval(V, Tags1, Scheme, tag_tree_and_related_concept(exact)), + concept_eval(V, Tags1, Scheme, tag_tree_related_sibbling_concept(exact)), + format(' '), + concept_eval(V, Tags1, Scheme, tag_concept(stem)), + concept_eval(V, Tags1, Scheme, tag_related_concept(stem)), + concept_eval(V, Tags1, Scheme, tag_tree_concept(stem)), + concept_eval(V, Tags1, Scheme, tag_tree_and_related_concept(stem)), + concept_eval(V, Tags1, Scheme, tag_tree_related_concept(stem)), + concept_eval(V, Tags1, Scheme, tag_tree_related_sibbling_concept(stem)), + %format(' '), + %concept_eval(V, tag_concept(sub)), + %concept_eval(V, tag_related_concept(sub)), + %concept_eval(V, tag_tree_concept(sub)), + %concept_eval(V, tag_tree_related_concept(sub)), + format('~n'), + fail + ; true + ). + +topN_eval_table(Type, N) :- + scheme_alias(Type, Scheme), + ( baseline_video(V), + rdf(V, dc:id, literal(Id)), + video_concepts(V, Scheme, Concepts), + video_tags(V, Tags), + remove_stop_words(Tags, dutch, Tags1), + + length(Concepts, ConceptCount), + length(Tags, TagCount), + format('~w,\t~w,\t~w, ', [Id, TagCount, ConceptCount]), + + + topN_concept_eval(V, Tags1, Scheme, N, tag_concept(exact)), + topN_concept_eval(V, Tags1, Scheme, N, tag_related_concept(exact)), + topN_concept_eval(V, Tags1, Scheme, N, tag_tree_concept(exact)), + topN_concept_eval(V, Tags1, Scheme, N, tag_tree_related_concept(exact)), + topN_concept_eval(V, Tags1, Scheme, N, tag_tree_and_related_concept(exact)), + topN_concept_eval(V, Tags1, Scheme, N, tag_tree_related_sibbling_concept(exact)), + format(' '), + topN_concept_eval(V, Tags1, Scheme, N, tag_concept(stem)), + topN_concept_eval(V, Tags1, Scheme, N, tag_related_concept(stem)), + topN_concept_eval(V, Tags1, Scheme, N, tag_tree_concept(stem)), + topN_concept_eval(V, Tags1, Scheme, N, tag_tree_and_related_concept(stem)), + topN_concept_eval(V, Tags1, Scheme, N, tag_tree_related_concept(stem)), + topN_concept_eval(V, Tags1, Scheme, N, tag_tree_related_sibbling_concept(stem)), + format('~n'), + fail + ; true + ). + +video_tags(Video, Tags) :- + findall(1-T, video_tag(Video, literal(T)), Tags). + +video_concepts(Video, Scheme, Concepts) :- + findall(C, (rdf(Video, dc:subject, C), + rdf(C, skos:inScheme, Scheme) + ), Concepts0), + sort(Concepts0, Concepts). + + +scheme_alias(all, Scheme) :- + rdf_equal(Scheme, gtaa:'GTAA'). +scheme_alias(persons, Scheme) :- + rdf_equal(Scheme, gtaa:'Peroonsnamen'). +scheme_alias(places, Scheme) :- + rdf_equal(Scheme, gtaa:'GeografischeNamen'). +scheme_alias(subjects, Scheme) :- + rdf_equal(Scheme, gtaa:'OnderwerpenBenG'). +scheme_alias(names, Scheme) :- + rdf_equal(Scheme, gtaa:'Namen'). + + +concept_eval(V, Tags, Scheme, Goal) :- + derived_concepts(Tags, Goal, Scheme, Derived), + baseline_eval(Derived,V,Scheme,Eval), + length(Derived,DerivedCount), + length(Eval,EvalCount), + %format('~w, ', [EvalCount]). + format('~w,~w,', [EvalCount,DerivedCount]). + + +topN_concept_eval(V, Tags, Scheme, N, Goal) :- + derived_ranked_concepts(Tags, Goal, Scheme, Ranked), + length(Ranked, Count), + ( Count < N + -> Top = Ranked + ; length(Top, N), + append(Top, _, Ranked) + ), + pairs_values(Ranked, Ranked1), + pairs_values(Top, Top1), + baseline_eval(Ranked1, V, Scheme, Eval), + baseline_eval(Top1, V, Scheme, InTopN), + length(Eval, EvalN), + length(InTopN, TopN), + format('~w, ~w, ', [EvalN, TopN]). + + + +%extend_gtaa_hierarchy :- + +gtaa_wn_hierarchy(GTAA, GTAA_Parent) :- + rdf(GTAA, skos:exactMatch, WN), + rdf_reachable(WN, skos:broader, WN_Parent), + WN_Parent \== WN, + rdf(GTAA_Parent, skos:exactMatch, WN_Parent). + %rdf_assert(GTAA, skos:broader, GTAA_Parent, gtaa_wordnet_broader). + + + +list_concepts(Id, Type, Goal) :- + scheme_alias(Type, Scheme), + rdf(V, dc:id, literal(Id)), + video_concepts(V, Scheme, Concepts), + format('prof. annotations~n'), + ( member(C, Concepts), + display_label(C, L), + format('~w~n', [L]), + fail + ; true + ), + format('~n derived concepts~n'), + video_tags(V, Tags), + remove_stop_words(Tags, dutch, Tags1), + derived_concepts(Tags1, Goal, Scheme, Derived), + ( member(C, Derived), + display_label(C, L), + format('~w ~n', [L]), + fail + ; true + ), + format('~n User tags~n'), + ( member(_-T, Tags1), + format('~w ~n', [T]), + fail + ; true + ). + + +remove_stop_words([], _, []). +remove_stop_words([_Rank-Tag0|Tags], Lang, Filtered) :- + downcase_atom(Tag0, Tag), + stop_word(Lang, Tag), + !, + remove_stop_words(Tags, Lang, Filtered). +remove_stop_words([Tag|Tags], Lang, [Tag|Filtered]) :- + remove_stop_words(Tags, Lang, Filtered).