vumix/commit

continue epxlorations

authorMichiel Hildebrand
Tue May 15 19:00:36 2012 +0200
committerMichiel Hildebrand
Tue May 15 19:00:36 2012 +0200
commit1ad40dbe516f35025b5c2c353e7c461ef87fa7b9
tree3865e405777e8b93340fd782f6517f9ef14b93fb
parentfe3a5f0caa4ebcd0fb72b4c5b10c48c62cbdcc9d
Diff style: patch stat
diff --git a/applications/vumix_p0.pl b/applications/vumix_p0.pl
index 7fb0721..bb40108 100644
--- a/applications/vumix_p0.pl
+++ b/applications/vumix_p0.pl
@@ -758,16 +758,18 @@ baseline_video(V) :-
 	rdf(V, rdf:type, pprime:'Baseline'),
 	once(rdf(V, dc:subject, _)).
 
-derived_concepts(Tags, Goal, Concepts) :-
+derived_concepts(Tags, Goal, Scheme, Concepts) :-
 	findall(C, (member(_-Tag, Tags),
-		    call(Goal, Tag, C)
+		    call(Goal, Tag, C),
+		    once(rdf(C, skos:inScheme, Scheme))
 		   ),
 		Concepts0),
 	sort(Concepts0, Concepts).
 
-derived_ranked_concepts(Tags, Goal, Ranked) :-
+derived_ranked_concepts(Tags, Goal, Scheme, Ranked) :-
 	findall(C-Tag, (member(_-Tag, Tags),
-			call(Goal, Tag, C)
+			call(Goal, Tag, C),
+			once(rdf(C, skos:inScheme, Scheme))
 		       ),
 		Pairs0),
 	keysort(Pairs0, Pairs),
@@ -778,13 +780,18 @@ tag_concept(exact, Tag, Concept) :-
 	rdf_has(Concept, rdfs:label, literal(exact(Tag),_)),
 	rdf(Concept, skos:inScheme, gtaa:'GTAA').
 
-tag_concept(stem, Tag, Concept) :-
+tag_concept(Stem, Tag, Concept) :-
+	(   Stem = stem(Diff)
+	->  true
+	;   Stem = stem
+	->  Diff = 0.9
+	),
 	snowball(dutch, Tag, TagStem),
 	rdf_has(Concept,rdfs:label, literal(prefix(TagStem), Lit)),
 	rdf(Concept, skos:inScheme, gtaa:'GTAA'),
 	literal_text(Lit, Label),
 	isub(TagStem, Label, true, Sim),
-	Sim > 0.8.
+	Sim > Diff.
 
 tag_concept(sub, Tag, Concept) :-
 	length(Results, 3),
@@ -848,7 +855,7 @@ tree_related(C, Concept) :-
 
 tree(C1, C2) :-
 	(   rdf_reachable(C1, skos:broader, C2)
-	;   rdf_reachable(C2, skos:broader, C1)
+	%;   rdf_reachable(C2, skos:broader, C1)
 	).
 
 sibbling(C1, C2) :-
@@ -867,16 +874,41 @@ baseline_eval([C|Cs], Video, Scheme, [C|Rest]) :-
 baseline_eval([_|Cs], Video, Scheme, Rest) :-
 	baseline_eval(Cs, Video, Scheme, Rest).
 
-eval_table(Type) :-
+stats_table(Type) :-
 	scheme_alias(Type, Scheme),
+	format('video\tt\tt_st\tc_ex\tc_st\tc_st_r\tbase~n',[]),
 	(   baseline_video(V),
 	    rdf(V, dc:id, literal(Id)),
+	    video_concepts(V, Scheme, Concepts),
 	    findall(1-T, video_tag(V, literal(T)), Tags),
 	    remove_stop_words(Tags, dutch, Tags1),
-	    findall(C, (rdf(V, dc:subject, C),
-			rdf(C, skos:inScheme, Scheme)
-		       ), Concepts),
+	    derived_concepts(Tags1, tag_concept(exact), Scheme, I_ExactMatch),
+	    derived_concepts(Tags1, tag_concept(stem), Scheme, I_StemMatch),
+	    derived_concepts(Tags1, tag_tree_and_related_concept(stem), Scheme, I_StemTR),
+	    length(Concepts, ConceptCount),
 	    length(Tags, TagCount),
+	    length(Tags1, TagFilteredCount),
+	    length(I_ExactMatch, ExactCount),
+	    length(I_StemMatch, StemCount),
+	    length(I_StemTR, StemTRCount),
+	    format('~w,\t~w,\t~w,\t~w,\t~w,\t~w,\t~w\t', [Id, TagCount, TagFilteredCount, ExactCount, StemCount, StemTRCount, ConceptCount]),
+	    concept_eval(V, Tags1, Scheme, tag_concept(exact)),
+	    concept_eval(V, Tags1, Scheme, tag_concept(stem)),
+	    concept_eval(V, Tags1, Scheme, tag_tree_and_related_concept(stem)),
+
+	    format('~n'),
+	    fail
+	;   true
+	).
+
+eval_table(Type) :-
+	scheme_alias(Type, Scheme),
+	(   baseline_video(V),
+	    rdf(V, dc:id, literal(Id)),
+	    findall(1-T, video_tag(V, literal(T)), Tags),
+	    remove_stop_words(Tags, dutch, Tags1),
+	    video_concepts(V, Scheme, Concepts),
+	    length(Tags1, TagCount),
 	    length(Concepts, ConceptCount),
 	    format('~w, ~w\t ~w,   ', [Id, TagCount, ConceptCount]),
 	    concept_eval(V, Tags1, Scheme, tag_concept(exact)),
@@ -902,25 +934,46 @@ eval_table(Type) :-
 	;   true
 	).
 
-topN_eval(Id, N, Type, Goal, TopN) :-
+topN_eval_table(Type, N) :-
 	scheme_alias(Type, Scheme),
-	rdf(V, dc:id, literal(Id)),
-	ranked_concepts(Id, Goal, Ranked),
-	length(Ranked, Count),
-	(   Count < N
-	->  Results0 = Ranked
-	;   length(Results0, N),
-	    append(Results0, _, Ranked)
-	),
-	pairs_values(Results0, Results),
-	baseline_eval(Results, V, Scheme, InTopN),
-	length(InTopN, TopN).
+	(   baseline_video(V),
+	    rdf(V, dc:id, literal(Id)),
+	    video_concepts(V, Scheme, Concepts),
+	    video_tags(V, Tags),
+	    remove_stop_words(Tags, dutch, Tags1),
+
+	    length(Concepts, ConceptCount),
+	    length(Tags, TagCount),
+	    format('~w,\t~w,\t~w,  ', [Id, TagCount, ConceptCount]),
+
+
+	    topN_concept_eval(V, Tags1, Scheme, N, tag_concept(exact)),
+	    topN_concept_eval(V, Tags1, Scheme, N, tag_related_concept(exact)),
+	    topN_concept_eval(V, Tags1, Scheme, N, tag_tree_concept(exact)),
+	    topN_concept_eval(V, Tags1, Scheme, N, tag_tree_related_concept(exact)),
+	    topN_concept_eval(V, Tags1, Scheme, N, tag_tree_and_related_concept(exact)),
+	    topN_concept_eval(V, Tags1, Scheme, N, tag_tree_related_sibbling_concept(exact)),
+	    format('  '),
+	    topN_concept_eval(V, Tags1, Scheme, N, tag_concept(stem)),
+	    topN_concept_eval(V, Tags1, Scheme, N, tag_related_concept(stem)),
+	    topN_concept_eval(V, Tags1, Scheme, N, tag_tree_concept(stem)),
+	    topN_concept_eval(V, Tags1, Scheme, N, tag_tree_and_related_concept(stem)),
+	    topN_concept_eval(V, Tags1, Scheme, N, tag_tree_related_concept(stem)),
+	    topN_concept_eval(V, Tags1, Scheme, N, tag_tree_related_sibbling_concept(stem)),
+	    format('~n'),
+	    fail
+	;   true
+	).
+
+video_tags(Video, Tags) :-
+	findall(1-T, video_tag(Video, literal(T)), Tags).
+
+video_concepts(Video, Scheme, Concepts) :-
+	findall(C, (rdf(Video, dc:subject, C),
+		    rdf(C, skos:inScheme, Scheme)
+		   ), Concepts0),
+	sort(Concepts0, Concepts).
 
-ranked_concepts(Id, Goal, Derived) :-
-	rdf(V, dc:id, literal(Id)),
-	findall(1-T, video_tag(V, literal(T)), Tags),
-	remove_stop_words(Tags, dutch, Tags1),
-	derived_ranked_concepts(Tags1, Goal, Derived).
 
 scheme_alias(all, Scheme) :-
 	rdf_equal(Scheme, gtaa:'GTAA').
@@ -935,12 +988,29 @@ scheme_alias(names, Scheme) :-
 
 
 concept_eval(V, Tags, Scheme, Goal) :-
-	    derived_concepts(Tags, Goal, Derived),
-	    baseline_eval(Derived,V,Scheme,Eval),
-	    length(Derived,DerivedCount),
-	    length(Eval,EvalCount),
-	    %format('~w, ', [EvalCount]).
-	    format('~w, ~w, ', [EvalCount, DerivedCount]).
+	derived_concepts(Tags, Goal, Scheme, Derived),
+	baseline_eval(Derived,V,Scheme,Eval),
+	%length(Derived,DerivedCount),
+	length(Eval,EvalCount),
+%format('~w, ', [EvalCount]).
+	format('~w,\t', [EvalCount]).
+
+
+topN_concept_eval(V, Tags, Scheme, N, Goal) :-
+	derived_ranked_concepts(Tags, Goal, Scheme, Ranked),
+	length(Ranked, Count),
+	(   Count < N
+	->  Top = Ranked
+	;   length(Top, N),
+	    append(Top, _, Ranked)
+	),
+	pairs_values(Ranked, Ranked1),
+	pairs_values(Top, Top1),
+	baseline_eval(Ranked1, V, Scheme, Eval),
+	baseline_eval(Top1, V, Scheme, InTopN),
+	length(Eval, EvalN),
+	length(InTopN, TopN),
+	format('~w, ~w,  ', [EvalN, TopN]).
 
 
 
@@ -953,3 +1023,32 @@ gtaa_wn_hierarchy(GTAA, GTAA_Parent) :-
 	rdf(GTAA_Parent, skos:exactMatch, WN_Parent).
 	%rdf_assert(GTAA, skos:broader, GTAA_Parent, gtaa_wordnet_broader).
 
+
+
+list_concepts(Id, Type, Goal) :-
+	scheme_alias(Type, Scheme),
+	rdf(V, dc:id, literal(Id)),
+	video_concepts(V, Scheme, Concepts),
+	format('prof. annotations~n'),
+	(   member(C, Concepts),
+	    display_label(C, L),
+	    format('~w~n', [L]),
+	    fail
+	;   true
+	),
+	format('~n derived concepts~n'),
+	video_tags(V, Tags),
+	remove_stop_words(Tags, dutch, Tags1),
+	derived_concepts(Tags1, Goal, Scheme, Derived),
+	(   member(C, Derived),
+	    display_label(C, L),
+	    format('~w ~n', [L]),
+	    fail
+	;   true
+	),
+	format('~n User tags~n'),
+	(   member(_-T, Tags1),
+	    format('~w ~n', [T]),
+	    fail
+	;   true
+	).
diff --git a/lib/beng_xml.pl b/lib/beng_xml.pl
index fd12965..e41e14e 100644
--- a/lib/beng_xml.pl
+++ b/lib/beng_xml.pl
@@ -2,6 +2,7 @@
 	  [overlap/1,
 	   beng_stats/0,
 	   import_fragments/0,
+	   assert_fragment_annotations/0,
 	   pprime_frags_by_year/1
 	  ]).
 
@@ -23,6 +24,12 @@ reeks('357006').
 reeks('357007').
 
 
+assert_fragment_annotations :-
+	import_fragments,
+	overlap(Pairs),
+	segment_terms(Pairs, Annotations),
+	assert_fragment_annotations(Annotations).
+
 assert_fragment_annotations(As) :-
 	rdf_equal(gtaa:'OnderwerpenBenG', Onderwerpen),
 	rdf_equal(gtaa:'Persoonsnamen', Persons),