vumix/commit
dutch stop words from snowball
author | Michiel Hildebrand |
---|---|
Mon Apr 23 15:03:34 2012 +0200 | |
committer | Michiel Hildebrand |
Mon Apr 23 15:03:34 2012 +0200 | |
commit | e7e2d4017c35f2dc47a92ab2ce37ae446ccf5d1d |
tree | bb182a35fbea2d8652e30de0bbe3e6e15938ea47 |
parent | 697d20b2b0a245e51de38e69923e63c9111fcfb9 |
Diff style: patch stat
diff --git a/lib/stop_words.pl b/lib/stop_words.pl new file mode 100644 index 0000000..48834d2 --- /dev/null +++ b/lib/stop_words.pl @@ -0,0 +1,116 @@ +:- module(stop_words, + [stop_word/2]). + +:- multifile + stop_word/2. + +/* http://snowball.tartarus.org/algorithms/dutch/stop.txt */ + +stop_word(dutch, de). +stop_word(dutch, en). +stop_word(dutch, van). +stop_word(dutch, ik). +stop_word(dutch, te). +stop_word(dutch, dat). +stop_word(dutch, die). +stop_word(dutch, in). +stop_word(dutch, een). +stop_word(dutch, hij). +stop_word(dutch, het). +stop_word(dutch, niet). +stop_word(dutch, zijn). +stop_word(dutch, is). +stop_word(dutch, was). +stop_word(dutch, op). +stop_word(dutch, aan). +stop_word(dutch, met). +stop_word(dutch, als). +stop_word(dutch, voor). +stop_word(dutch, had). +stop_word(dutch, er). +stop_word(dutch, maar). +stop_word(dutch, om). +stop_word(dutch, hem). +stop_word(dutch, dan). +stop_word(dutch, zou). +stop_word(dutch, of). +stop_word(dutch, wat). +stop_word(dutch, mijn). +stop_word(dutch, men). +stop_word(dutch, dit). +stop_word(dutch, zo). +stop_word(dutch, door). +stop_word(dutch, over). +stop_word(dutch, ze). +stop_word(dutch, zich). +stop_word(dutch, bij). +stop_word(dutch, ook). +stop_word(dutch, tot). +stop_word(dutch, je). +stop_word(dutch, mij). +stop_word(dutch, uit). +stop_word(dutch, der). +stop_word(dutch, daar). +stop_word(dutch, haar). +stop_word(dutch, naar). +stop_word(dutch, heb). +stop_word(dutch, hoe). +stop_word(dutch, heeft). +stop_word(dutch, hebben). +stop_word(dutch, deze). +stop_word(dutch, u). +stop_word(dutch, want). +stop_word(dutch, nog). +stop_word(dutch, zal). +stop_word(dutch, me). +stop_word(dutch, zij). +stop_word(dutch, nu). +stop_word(dutch, ge). +stop_word(dutch, geen). +stop_word(dutch, omdat). +stop_word(dutch, iets). +stop_word(dutch, worden). +stop_word(dutch, toch). +stop_word(dutch, al). +stop_word(dutch, waren). +stop_word(dutch, veel). +stop_word(dutch, meer). +stop_word(dutch, doen). +stop_word(dutch, toen). +stop_word(dutch, moet). +stop_word(dutch, ben). +stop_word(dutch, zonder). +stop_word(dutch, kan). +stop_word(dutch, hun). +stop_word(dutch, dus). +stop_word(dutch, alles). +stop_word(dutch, onder). +stop_word(dutch, ja). +stop_word(dutch, eens). +stop_word(dutch, hier). +stop_word(dutch, wie). +stop_word(dutch, werd). +stop_word(dutch, altijd). +stop_word(dutch, doch). +stop_word(dutch, wordt). +stop_word(dutch, wezen). +stop_word(dutch, kunnen). +stop_word(dutch, ons). +stop_word(dutch, zelf). +stop_word(dutch, tegen). +stop_word(dutch, na). +stop_word(dutch, reeds). +stop_word(dutch, wil). +stop_word(dutch, kon). +stop_word(dutch, niets). +stop_word(dutch, uw). +stop_word(dutch, iemand). +stop_word(dutch, geweest). +stop_word(dutch, andere). + + +% Man bijt hond specific +stop_word(dutch, 'man bijt hond'). +stop_word(dutch, man). +stop_word(dutch, bijt). +stop_word(dutch, hond).