vumix/commit
experimenting with tfidf
author | Michiel Hildebrand |
---|---|
Fri Apr 20 10:41:04 2012 +0200 | |
committer | Michiel Hildebrand |
Fri Apr 20 10:41:04 2012 +0200 | |
commit | 20815df5b4bfa53e87362ad9e3d3db04f4fe81e2 |
tree | 25983384e704eb659b6ae4ae01baeeafe81ce063 |
parent | 7221e32b331e95aa753138435605bef922c059e4 |
Diff style: patch stat
diff --git a/lib/tfidf.pl b/lib/tfidf.pl index b558e0a..00d0a21 100644 --- a/lib/tfidf.pl +++ b/lib/tfidf.pl @@ -5,6 +5,9 @@ idf/3 ]). +:- use_module(library(csv)). +:- use_module(library(semweb/rdf_db)). + tag_rank(Video, RankedTagList) :- documents(Videos), findall(T, document_term(Video, T), Ts0), @@ -24,10 +27,10 @@ documents(Videos) :- document_term(D, T) :- rdf(D, pprime:hasAnnotation, E), - rdf(E, rdf:value, literal(T)), - rdf(E, pprime:score, literal(SA)), - atom_number(SA, S), - S > 5. + rdf(E, rdf:value, literal(T)). + %rdf(E, pprime:score, literal(SA)), + %atom_number(SA, S), + %S > 5. tf(T, D, TF) :- findall(T, @@ -50,3 +53,17 @@ idf(T, Collection, IDF) :- +write_csv(File) :- + documents(Videos), + video_tags(Videos, Rows), + csv_write_file(File, Rows). + + +video_tags([], []). +video_tags([Video|Vs], [row(Id,TagA)|Rs]) :- + rdf(Video,dc:id,literal(Id)), + findall(T, document_term(Video, T), Tags), + concat_atom(Tags, ' ', TagA), + video_tags(Vs, Rs). + +