vumix/commit
Fix idf score (remove duplicate findings of document)
author | Michiel Hildebrand |
---|---|
Fri May 18 18:09:36 2012 +0200 | |
committer | Michiel Hildebrand |
Fri May 18 18:09:36 2012 +0200 | |
commit | e228ed41f4ff18131555560d96b1dd82f9730ad4 |
tree | 21f9501617d418b0aafd341f51fb7782de95cc83 |
parent | 8898bc8c7fb378c7314ee2f71d0cb0845915c343 |
Diff style: patch stat
diff --git a/lib/tfidf.pl b/lib/tfidf.pl index ed10ef5..27fb2d5 100644 --- a/lib/tfidf.pl +++ b/lib/tfidf.pl @@ -9,6 +9,7 @@ :- use_module(library(csv)). :- use_module(library(semweb/rdf_db)). +:- use_module(library(stop_words)). :- dynamic tag_rank_cache/2. @@ -22,7 +23,8 @@ tag_rank(Video, RankedTagList) :- tag_rank(Video, RankedTagList) :- documents(Videos), findall(T, document_term(Video, T), Ts0), - sort(Ts0, Ts), + sort(Ts0, Ts1), + remove_stop_words(Ts1, dutch, Ts), maplist(tag_score(Video, Videos), Ts, Scored), keysort(Scored, Sorted), reverse(Sorted, RankedTagList), @@ -54,7 +56,7 @@ idf(T, Collection, IDF) :- length(Collection, CollectionSize), findall(D, ( member(D, Collection), - document_term(T, D) + once(document_term(T, D)) ), DT), length(DT, DTCount), @@ -62,9 +64,6 @@ idf(T, Collection, IDF) :- - - - write_csv(File) :- documents(Videos), video_tags(Videos, Rows),