amalgame/commit

ADDED: snowball selecter module

authorJacco van Ossenbruggen
Mon Sep 8 12:22:35 2014 +0200
committerJacco van Ossenbruggen
Mon Sep 8 12:22:35 2014 +0200
commit20cd7fe42b1ce31d84ead872a60c26166a9019bf
tree8f8aba864954d63647353a12066170b9effe143e
parent61a373eb7c839933519e1f895e1fd6bf3ce73b22
Diff style: patch stat
diff --git a/config-available/ag_modules.pl b/config-available/ag_modules.pl
index 6229570..4e0c03b 100644
--- a/config-available/ag_modules.pl
+++ b/config-available/ag_modules.pl
@@ -16,6 +16,7 @@
 :- use_module(library(ag_modules/exact_label_selecter)).
 :- use_module(library(ag_modules/isub_selecter)).
 :- use_module(library(ag_modules/related_selecter)).
+:- use_module(library(ag_modules/snowball_label_selecter)).
 
 % Vocabulary filters/selecters
 :- use_module(library(ag_modules/voc_exclude)).
diff --git a/lib/ag_modules/snowball_label_selecter.pl b/lib/ag_modules/snowball_label_selecter.pl
new file mode 100644
index 0000000..324b505
--- /dev/null
+++ b/lib/ag_modules/snowball_label_selecter.pl
@@ -0,0 +1,48 @@
+:- module(snowball_label_selecter,
+	  []).
+
+:- use_module(library(semweb/rdf_db)).
+:- use_module(library(snowball)).
+:- use_module(string_match_util).
+:- use_module(snowball_label_match).
+:- use_module(label_selecter).
+
+:- public amalgame_module/1.
+:- public parameter/4.
+:- public selecter/5.
+
+amalgame_module(amalgame:'SnowballLabelSelecter').
+
+parameter(type,
+	  oneof([source,target, all]), all,
+	 'Select all exact label matches or pick best source/target to disambiguate').
+
+parameter(sourcelabel, oneof(LabelProps), Default,
+	  '(Super)Property to get label of the source by') :-
+	rdf_equal(Default, rdfs:label),
+	label_list(LabelProps).
+parameter(targetlabel, oneof(LabelProps), Default,
+	  '(Super)Property to get the label of the target by') :-
+	rdf_equal(Default, rdfs:label),
+	label_list(LabelProps).
+parameter(source_language, oneof(['any'|L]), 'any', 'Language of source label') :-
+	strategy_languages(_,L).
+parameter(matchacross_lang, boolean, true,
+	  'Allow labels from different language to be matched').
+parameter(snowball_language, oneof(Languages), english,
+	  'Language to use for stemmer') :-
+	findall(Alg, snowball_current_algorithm(Alg), Languages).
+parameter(prefix, integer, 4,
+	  'Optimise performence by first generating candidates by matching the prefix.Input is an integer for the prefix length.').
+parameter(edit_distance, integer, 0,
+	  'When >0 allow additional differences between labels').
+parameter(match_qualified_only, boolean, false,
+	  'Match only on the fully qualified label').
+
+
+selecter(In, Sel, Dis, Und, Options) :-
+	label_selecter(snowball_label_match, In, Sel, Dis, Und, Options).
+
+
+
+
diff --git a/rdf/tool/ag_modules.ttl b/rdf/tool/ag_modules.ttl
index 31a306a..04f193c 100644
--- a/rdf/tool/ag_modules.ttl
+++ b/rdf/tool/ag_modules.ttl
@@ -78,6 +78,11 @@ amalgame:RelatedSelecter
     skos:definition "Select mappings with the most mapped related concepts, discard others for the same source/target. If type=all, all correspondences with one or more related concepts are selected."@en ;
     rdfs:subClassOf amalgame:MappingPartitioner .
 
+amalgame:SnowballLabelSelecter
+    rdfs:label "partition/label/stemmed"@en ;
+    skos:definition "Select mappings with the most matching labels after (snowball) stemming, discard others for the same source/target. If type=all, all candidates with matching labels are selected."@en ;
+    rdfs:subClassOf amalgame:MappingPartitioner .
+
 ######## Vocabulary partitioner classes:
 
 amalgame:VocExclude