amalgame/commit
ADDED: snowball selecter module
author | Jacco van Ossenbruggen |
---|---|
Mon Sep 8 12:22:35 2014 +0200 | |
committer | Jacco van Ossenbruggen |
Mon Sep 8 12:22:35 2014 +0200 | |
commit | 20cd7fe42b1ce31d84ead872a60c26166a9019bf |
tree | 8f8aba864954d63647353a12066170b9effe143e |
parent | 61a373eb7c839933519e1f895e1fd6bf3ce73b22 |
Diff style: patch stat
diff --git a/config-available/ag_modules.pl b/config-available/ag_modules.pl index 6229570..4e0c03b 100644 --- a/config-available/ag_modules.pl +++ b/config-available/ag_modules.pl @@ -16,6 +16,7 @@ :- use_module(library(ag_modules/exact_label_selecter)). :- use_module(library(ag_modules/isub_selecter)). :- use_module(library(ag_modules/related_selecter)). +:- use_module(library(ag_modules/snowball_label_selecter)). % Vocabulary filters/selecters :- use_module(library(ag_modules/voc_exclude)). diff --git a/lib/ag_modules/snowball_label_selecter.pl b/lib/ag_modules/snowball_label_selecter.pl new file mode 100644 index 0000000..324b505 --- /dev/null +++ b/lib/ag_modules/snowball_label_selecter.pl @@ -0,0 +1,48 @@ +:- module(snowball_label_selecter, + []). + +:- use_module(library(semweb/rdf_db)). +:- use_module(library(snowball)). +:- use_module(string_match_util). +:- use_module(snowball_label_match). +:- use_module(label_selecter). + +:- public amalgame_module/1. +:- public parameter/4. +:- public selecter/5. + +amalgame_module(amalgame:'SnowballLabelSelecter'). + +parameter(type, + oneof([source,target, all]), all, + 'Select all exact label matches or pick best source/target to disambiguate'). + +parameter(sourcelabel, oneof(LabelProps), Default, + '(Super)Property to get label of the source by') :- + rdf_equal(Default, rdfs:label), + label_list(LabelProps). +parameter(targetlabel, oneof(LabelProps), Default, + '(Super)Property to get the label of the target by') :- + rdf_equal(Default, rdfs:label), + label_list(LabelProps). +parameter(source_language, oneof(['any'|L]), 'any', 'Language of source label') :- + strategy_languages(_,L). +parameter(matchacross_lang, boolean, true, + 'Allow labels from different language to be matched'). +parameter(snowball_language, oneof(Languages), english, + 'Language to use for stemmer') :- + findall(Alg, snowball_current_algorithm(Alg), Languages). +parameter(prefix, integer, 4, + 'Optimise performence by first generating candidates by matching the prefix.Input is an integer for the prefix length.'). +parameter(edit_distance, integer, 0, + 'When >0 allow additional differences between labels'). +parameter(match_qualified_only, boolean, false, + 'Match only on the fully qualified label'). + + +selecter(In, Sel, Dis, Und, Options) :- + label_selecter(snowball_label_match, In, Sel, Dis, Und, Options). + + + + diff --git a/rdf/tool/ag_modules.ttl b/rdf/tool/ag_modules.ttl index 31a306a..04f193c 100644 --- a/rdf/tool/ag_modules.ttl +++ b/rdf/tool/ag_modules.ttl @@ -78,6 +78,11 @@ amalgame:RelatedSelecter skos:definition "Select mappings with the most mapped related concepts, discard others for the same source/target. If type=all, all correspondences with one or more related concepts are selected."@en ; rdfs:subClassOf amalgame:MappingPartitioner . +amalgame:SnowballLabelSelecter + rdfs:label "partition/label/stemmed"@en ; + skos:definition "Select mappings with the most matching labels after (snowball) stemming, discard others for the same source/target. If type=all, all candidates with matching labels are selected."@en ; + rdfs:subClassOf amalgame:MappingPartitioner . + ######## Vocabulary partitioner classes: amalgame:VocExclude