vumix/commit
continue epxlorations
author | Michiel Hildebrand |
---|---|
Tue May 15 19:00:36 2012 +0200 | |
committer | Michiel Hildebrand |
Tue May 15 19:00:36 2012 +0200 | |
commit | 1ad40dbe516f35025b5c2c353e7c461ef87fa7b9 |
tree | 3865e405777e8b93340fd782f6517f9ef14b93fb |
parent | fe3a5f0caa4ebcd0fb72b4c5b10c48c62cbdcc9d |
Diff style: patch stat
diff --git a/applications/vumix_p0.pl b/applications/vumix_p0.pl index 7fb0721..bb40108 100644 --- a/applications/vumix_p0.pl +++ b/applications/vumix_p0.pl @@ -758,16 +758,18 @@ baseline_video(V) :- rdf(V, rdf:type, pprime:'Baseline'), once(rdf(V, dc:subject, _)). -derived_concepts(Tags, Goal, Concepts) :- +derived_concepts(Tags, Goal, Scheme, Concepts) :- findall(C, (member(_-Tag, Tags), - call(Goal, Tag, C) + call(Goal, Tag, C), + once(rdf(C, skos:inScheme, Scheme)) ), Concepts0), sort(Concepts0, Concepts). -derived_ranked_concepts(Tags, Goal, Ranked) :- +derived_ranked_concepts(Tags, Goal, Scheme, Ranked) :- findall(C-Tag, (member(_-Tag, Tags), - call(Goal, Tag, C) + call(Goal, Tag, C), + once(rdf(C, skos:inScheme, Scheme)) ), Pairs0), keysort(Pairs0, Pairs), @@ -778,13 +780,18 @@ tag_concept(exact, Tag, Concept) :- rdf_has(Concept, rdfs:label, literal(exact(Tag),_)), rdf(Concept, skos:inScheme, gtaa:'GTAA'). -tag_concept(stem, Tag, Concept) :- +tag_concept(Stem, Tag, Concept) :- + ( Stem = stem(Diff) + -> true + ; Stem = stem + -> Diff = 0.9 + ), snowball(dutch, Tag, TagStem), rdf_has(Concept,rdfs:label, literal(prefix(TagStem), Lit)), rdf(Concept, skos:inScheme, gtaa:'GTAA'), literal_text(Lit, Label), isub(TagStem, Label, true, Sim), - Sim > 0.8. + Sim > Diff. tag_concept(sub, Tag, Concept) :- length(Results, 3), @@ -848,7 +855,7 @@ tree_related(C, Concept) :- tree(C1, C2) :- ( rdf_reachable(C1, skos:broader, C2) - ; rdf_reachable(C2, skos:broader, C1) + %; rdf_reachable(C2, skos:broader, C1) ). sibbling(C1, C2) :- @@ -867,16 +874,41 @@ baseline_eval([C|Cs], Video, Scheme, [C|Rest]) :- baseline_eval([_|Cs], Video, Scheme, Rest) :- baseline_eval(Cs, Video, Scheme, Rest). -eval_table(Type) :- +stats_table(Type) :- scheme_alias(Type, Scheme), + format('video\tt\tt_st\tc_ex\tc_st\tc_st_r\tbase~n',[]), ( baseline_video(V), rdf(V, dc:id, literal(Id)), + video_concepts(V, Scheme, Concepts), findall(1-T, video_tag(V, literal(T)), Tags), remove_stop_words(Tags, dutch, Tags1), - findall(C, (rdf(V, dc:subject, C), - rdf(C, skos:inScheme, Scheme) - ), Concepts), + derived_concepts(Tags1, tag_concept(exact), Scheme, I_ExactMatch), + derived_concepts(Tags1, tag_concept(stem), Scheme, I_StemMatch), + derived_concepts(Tags1, tag_tree_and_related_concept(stem), Scheme, I_StemTR), + length(Concepts, ConceptCount), length(Tags, TagCount), + length(Tags1, TagFilteredCount), + length(I_ExactMatch, ExactCount), + length(I_StemMatch, StemCount), + length(I_StemTR, StemTRCount), + format('~w,\t~w,\t~w,\t~w,\t~w,\t~w,\t~w\t', [Id, TagCount, TagFilteredCount, ExactCount, StemCount, StemTRCount, ConceptCount]), + concept_eval(V, Tags1, Scheme, tag_concept(exact)), + concept_eval(V, Tags1, Scheme, tag_concept(stem)), + concept_eval(V, Tags1, Scheme, tag_tree_and_related_concept(stem)), + + format('~n'), + fail + ; true + ). + +eval_table(Type) :- + scheme_alias(Type, Scheme), + ( baseline_video(V), + rdf(V, dc:id, literal(Id)), + findall(1-T, video_tag(V, literal(T)), Tags), + remove_stop_words(Tags, dutch, Tags1), + video_concepts(V, Scheme, Concepts), + length(Tags1, TagCount), length(Concepts, ConceptCount), format('~w, ~w\t ~w, ', [Id, TagCount, ConceptCount]), concept_eval(V, Tags1, Scheme, tag_concept(exact)), @@ -902,25 +934,46 @@ eval_table(Type) :- ; true ). -topN_eval(Id, N, Type, Goal, TopN) :- +topN_eval_table(Type, N) :- scheme_alias(Type, Scheme), - rdf(V, dc:id, literal(Id)), - ranked_concepts(Id, Goal, Ranked), - length(Ranked, Count), - ( Count < N - -> Results0 = Ranked - ; length(Results0, N), - append(Results0, _, Ranked) - ), - pairs_values(Results0, Results), - baseline_eval(Results, V, Scheme, InTopN), - length(InTopN, TopN). + ( baseline_video(V), + rdf(V, dc:id, literal(Id)), + video_concepts(V, Scheme, Concepts), + video_tags(V, Tags), + remove_stop_words(Tags, dutch, Tags1), + + length(Concepts, ConceptCount), + length(Tags, TagCount), + format('~w,\t~w,\t~w, ', [Id, TagCount, ConceptCount]), + + + topN_concept_eval(V, Tags1, Scheme, N, tag_concept(exact)), + topN_concept_eval(V, Tags1, Scheme, N, tag_related_concept(exact)), + topN_concept_eval(V, Tags1, Scheme, N, tag_tree_concept(exact)), + topN_concept_eval(V, Tags1, Scheme, N, tag_tree_related_concept(exact)), + topN_concept_eval(V, Tags1, Scheme, N, tag_tree_and_related_concept(exact)), + topN_concept_eval(V, Tags1, Scheme, N, tag_tree_related_sibbling_concept(exact)), + format(' '), + topN_concept_eval(V, Tags1, Scheme, N, tag_concept(stem)), + topN_concept_eval(V, Tags1, Scheme, N, tag_related_concept(stem)), + topN_concept_eval(V, Tags1, Scheme, N, tag_tree_concept(stem)), + topN_concept_eval(V, Tags1, Scheme, N, tag_tree_and_related_concept(stem)), + topN_concept_eval(V, Tags1, Scheme, N, tag_tree_related_concept(stem)), + topN_concept_eval(V, Tags1, Scheme, N, tag_tree_related_sibbling_concept(stem)), + format('~n'), + fail + ; true + ). + +video_tags(Video, Tags) :- + findall(1-T, video_tag(Video, literal(T)), Tags). + +video_concepts(Video, Scheme, Concepts) :- + findall(C, (rdf(Video, dc:subject, C), + rdf(C, skos:inScheme, Scheme) + ), Concepts0), + sort(Concepts0, Concepts). -ranked_concepts(Id, Goal, Derived) :- - rdf(V, dc:id, literal(Id)), - findall(1-T, video_tag(V, literal(T)), Tags), - remove_stop_words(Tags, dutch, Tags1), - derived_ranked_concepts(Tags1, Goal, Derived). scheme_alias(all, Scheme) :- rdf_equal(Scheme, gtaa:'GTAA'). @@ -935,12 +988,29 @@ scheme_alias(names, Scheme) :- concept_eval(V, Tags, Scheme, Goal) :- - derived_concepts(Tags, Goal, Derived), - baseline_eval(Derived,V,Scheme,Eval), - length(Derived,DerivedCount), - length(Eval,EvalCount), - %format('~w, ', [EvalCount]). - format('~w, ~w, ', [EvalCount, DerivedCount]). + derived_concepts(Tags, Goal, Scheme, Derived), + baseline_eval(Derived,V,Scheme,Eval), + %length(Derived,DerivedCount), + length(Eval,EvalCount), +%format('~w, ', [EvalCount]). + format('~w,\t', [EvalCount]). + + +topN_concept_eval(V, Tags, Scheme, N, Goal) :- + derived_ranked_concepts(Tags, Goal, Scheme, Ranked), + length(Ranked, Count), + ( Count < N + -> Top = Ranked + ; length(Top, N), + append(Top, _, Ranked) + ), + pairs_values(Ranked, Ranked1), + pairs_values(Top, Top1), + baseline_eval(Ranked1, V, Scheme, Eval), + baseline_eval(Top1, V, Scheme, InTopN), + length(Eval, EvalN), + length(InTopN, TopN), + format('~w, ~w, ', [EvalN, TopN]). @@ -953,3 +1023,32 @@ gtaa_wn_hierarchy(GTAA, GTAA_Parent) :- rdf(GTAA_Parent, skos:exactMatch, WN_Parent). %rdf_assert(GTAA, skos:broader, GTAA_Parent, gtaa_wordnet_broader). + + +list_concepts(Id, Type, Goal) :- + scheme_alias(Type, Scheme), + rdf(V, dc:id, literal(Id)), + video_concepts(V, Scheme, Concepts), + format('prof. annotations~n'), + ( member(C, Concepts), + display_label(C, L), + format('~w~n', [L]), + fail + ; true + ), + format('~n derived concepts~n'), + video_tags(V, Tags), + remove_stop_words(Tags, dutch, Tags1), + derived_concepts(Tags1, Goal, Scheme, Derived), + ( member(C, Derived), + display_label(C, L), + format('~w ~n', [L]), + fail + ; true + ), + format('~n User tags~n'), + ( member(_-T, Tags1), + format('~w ~n', [T]), + fail + ; true + ). diff --git a/lib/beng_xml.pl b/lib/beng_xml.pl index fd12965..e41e14e 100644 --- a/lib/beng_xml.pl +++ b/lib/beng_xml.pl @@ -2,6 +2,7 @@ [overlap/1, beng_stats/0, import_fragments/0, + assert_fragment_annotations/0, pprime_frags_by_year/1 ]). @@ -23,6 +24,12 @@ reeks('357006'). reeks('357007'). +assert_fragment_annotations :- + import_fragments, + overlap(Pairs), + segment_terms(Pairs, Annotations), + assert_fragment_annotations(Annotations). + assert_fragment_annotations(As) :- rdf_equal(gtaa:'OnderwerpenBenG', Onderwerpen), rdf_equal(gtaa:'Persoonsnamen', Persons),