vumix/commit

experimenting with tfidf

authorMichiel Hildebrand
Fri Apr 20 10:41:04 2012 +0200
committerMichiel Hildebrand
Fri Apr 20 10:41:04 2012 +0200
commit20815df5b4bfa53e87362ad9e3d3db04f4fe81e2
tree25983384e704eb659b6ae4ae01baeeafe81ce063
parent7221e32b331e95aa753138435605bef922c059e4
Diff style: patch stat
diff --git a/lib/tfidf.pl b/lib/tfidf.pl
index b558e0a..00d0a21 100644
--- a/lib/tfidf.pl
+++ b/lib/tfidf.pl
@@ -5,6 +5,9 @@
 	   idf/3
 	  ]).
 
+:- use_module(library(csv)).
+:- use_module(library(semweb/rdf_db)).
+
 tag_rank(Video, RankedTagList) :-
 	documents(Videos),
 	findall(T, document_term(Video, T), Ts0),
@@ -24,10 +27,10 @@ documents(Videos) :-
 
 document_term(D, T) :-
 	rdf(D, pprime:hasAnnotation, E),
-	rdf(E, rdf:value, literal(T)),
-	rdf(E, pprime:score, literal(SA)),
-	atom_number(SA, S),
-	S > 5.
+	rdf(E, rdf:value, literal(T)).
+	%rdf(E, pprime:score, literal(SA)),
+	%atom_number(SA, S),
+	%S > 5.
 
 tf(T, D, TF) :-
 	findall(T,
@@ -50,3 +53,17 @@ idf(T, Collection, IDF) :-
 
 
 
+write_csv(File) :-
+	documents(Videos),
+	video_tags(Videos, Rows),
+	csv_write_file(File, Rows).
+
+
+video_tags([], []).
+video_tags([Video|Vs], [row(Id,TagA)|Rs]) :-
+	rdf(Video,dc:id,literal(Id)),
+	findall(T, document_term(Video, T), Tags),
+	concat_atom(Tags, ' ', TagA),
+	video_tags(Vs, Rs).
+
+