amalgame/commit
ADDED: isub partitioner, refactorized exact/isub modules
author | Jacco van Ossenbruggen |
---|---|
Sat Sep 6 16:53:17 2014 +0200 | |
committer | Jacco van Ossenbruggen |
Sat Sep 6 16:53:17 2014 +0200 | |
commit | 3940c0f502c25dd9eab9f5a80d6531cf648f6c7a |
tree | 79a9654bb57190ed9dd73b7ad16bbbab2fc71146 |
parent | 7d710a9010a2aef6ccd0460ec5fc19419cb3c2bd |
Diff style: patch stat
diff --git a/config-available/ag_modules.pl b/config-available/ag_modules.pl index 06a01e7..7558085 100644 --- a/config-available/ag_modules.pl +++ b/config-available/ag_modules.pl @@ -4,11 +4,15 @@ :- use_module(library(ag_modules/ancestor_generator)). :- use_module(library(ag_modules/descendent_generator)). :- use_module(library(ag_modules/exact_label_generator)). +:- use_module(library(ag_modules/isub_generator)). +:- use_module(library(ag_modules/related_generator)). % Mapping producing partitioners: :- use_module(library(ag_modules/ancestor_selecter)). :- use_module(library(ag_modules/descendent_selecter)). :- use_module(library(ag_modules/exact_label_selecter)). +:- use_module(library(ag_modules/isub_selecter)). +:- use_module(library(ag_modules/related_selecter)). % Vocabulary filters/selecters :- use_module(library(ag_modules/voc_exclude)). @@ -19,8 +23,6 @@ % Modules that can be used as matchers :- use_module(library(ag_modules/compound_match)). :- use_module(library(ag_modules/snowball_match)). -:- use_module(library(ag_modules/isub_match)). -:- use_module(library(ag_modules/related_match)). :- use_module(library(ag_modules/preloaded_mapping)). % Alignment filters/selecters @@ -31,7 +33,6 @@ :- use_module(library(ag_modules/most_methods)). :- use_module(library(ag_modules/most_labels)). :- use_module(library(ag_modules/most_generic)). -:- use_module(library(ag_modules/related_selecter)). :- use_module(library(ag_modules/sibling_selecter)). :- use_module(library(ag_modules/preloaded_selecter)). diff --git a/lib/ag_modules/exact_label_selecter.pl b/lib/ag_modules/exact_label_selecter.pl index 7158fdb..22a323d 100644 --- a/lib/ag_modules/exact_label_selecter.pl +++ b/lib/ag_modules/exact_label_selecter.pl @@ -5,8 +5,6 @@ :- public selecter/5. :- public parameter/4. -:- use_module(library(sort)). -:- use_module(library(amalgame/map)). :- use_module(label_selecter). :- use_module(exact_label_match). :- use_module(string_match_util). @@ -38,12 +36,4 @@ parameter(match_qualified_only, boolean, false, amalgame_module(amalgame:'ExactLabelSelecter'). selecter(In, Sel, Dis, Und, Options) :- - option(type(SourceOrTarget), Options, all), - ( SourceOrTarget \= source - -> label_selecter(SourceOrTarget, exact_label_match, In, Sel, Dis, Und, Options) - ; predsort(ag_map:compare_align(target), In, InT), - label_selecter(SourceOrTarget, exact_label_match, InT, Sel0, Dis0, Und0, Options), - predsort(ag_map:compare_align(source), Sel0, Sel), - predsort(ag_map:compare_align(source), Dis0, Dis), - predsort(ag_map:compare_align(source), Und0, Und) - ). + label_selecter(exact_label_match, In, Sel, Dis, Und, Options). diff --git a/lib/ag_modules/isub_generator.pl b/lib/ag_modules/isub_generator.pl new file mode 100644 index 0000000..67dddba --- /dev/null +++ b/lib/ag_modules/isub_generator.pl @@ -0,0 +1,45 @@ +:- module(isub_generator, + []). + +:- use_module(library(amalgame/vocabulary)). +:- use_module(isub_match). +:- use_module(string_match_util). + +:- public matcher/4. +:- public parameter/4. +:- public amalgame_module/1. + +amalgame_module(amalgame:'IsubMatcher'). + +parameter(sourcelabel, oneof(LabelProps), Default, + '(Super)Property to get label of the source by') :- + rdf_equal(Default, rdfs:label), + label_list(LabelProps). +parameter(targetlabel, oneof(LabelProps), Default, + '(Super)Property to get the label of the target by') :- + rdf_equal(Default, rdfs:label), + label_list(LabelProps). +parameter(threshold, float, 0.7, + 'threshold edit distance'). +parameter(language, oneof(['any'|L]), 'any', 'Language of source label') :- + strategy_languages(_,L). +parameter(matchacross_lang, + boolean, true, + 'Allow labels from different language to be matched'). +parameter(normalize, + boolean, false, + '(Case) normalize strings as described in the isub article'). + +%% matcher(+Source, +Target, -Mappings, +Options) +% +% Mappings is a list of matches between instances of Source and +% Target. + +matcher(Source, Target, Mappings, Options) :- + findall(M, align(Source, Target, M, Options), Mappings0), + sort(Mappings0, Mappings). + +align(Source, Target, Match, Options) :- + vocab_member(S, Source), + vocab_member(T, Target), + isub_match(align(S,T,[]), Match, Options). diff --git a/lib/ag_modules/isub_match.pl b/lib/ag_modules/isub_match.pl index 6c8a33c..bb0dae6 100644 --- a/lib/ag_modules/isub_match.pl +++ b/lib/ag_modules/isub_match.pl @@ -1,72 +1,12 @@ :- module(isub_match, - []). + [isub_match/3]). +:- use_module(library(option)). :- use_module(library(semweb/rdf_db)). :- use_module(library(semweb/rdf_label)). :- use_module(library(isub)). -:- use_module(library(amalgame/vocabulary)). -:- use_module(string_match_util). -:- public filter/3. -:- public matcher/4. -:- public parameter/4. -:- public amalgame_module/1. - -amalgame_module(amalgame:'IsubMatcher'). -amalgame_module(amalgame:'IsubFilter'). - -parameter(sourcelabel, oneof(LabelProps), Default, - '(Super)Property to get label of the source by') :- - rdf_equal(Default, rdfs:label), - label_list(LabelProps). -parameter(targetlabel, oneof(LabelProps), Default, - '(Super)Property to get the label of the target by') :- - rdf_equal(Default, rdfs:label), - label_list(LabelProps). -parameter(threshold, float, 0.7, - 'threshold edit distance'). -parameter(language, oneof(['any'|L]), 'any', 'Language of source label') :- - strategy_languages(_,L). -parameter(matchacross_lang, - boolean, true, - 'Allow labels from different language to be matched'). -parameter(normalize, - boolean, false, - '(Case) normalize strings as described in the isub article'). - -%% filter(+MappingsIn, -MappingsOut, +Options) -% -% Filter mappings based on exact matching of labels. - -filter([], [], _). -filter([align(S,T,P)|Cs], [C|Mappings], Options) :- - ( T = scheme(_) - -> vocab_member(T2, T), - match(align(S,T2,P), C, Options) - ; match(align(S,T,P), C, Options) - ), - !, - filter(Cs, Mappings, Options). -filter([_|Cs], Mappings, Options) :- - filter(Cs, Mappings, Options). - - -%% matcher(+Source, +Target, -Mappings, +Options) -% -% Mappings is a list of matches between instances of Source and -% Target. - -matcher(Source, Target, Mappings, Options) :- - findall(M, align(Source, Target, M, Options), Mappings0), - sort(Mappings0, Mappings). - -align(Source, Target, Match, Options) :- - vocab_member(S, Source), - vocab_member(T, Target), - match(align(S,T,[]), Match, Options). - - -match(align(Source, Target, Prov0), align(Source, Target, [Prov|Prov0]), Options) :- +isub_match(align(Source, Target, Prov0), align(Source, Target, [Prov|Prov0]), Options) :- rdf_equal(skos:definition, DefaultProp), option(threshold(Threshold), Options, 0.0), option(sourcelabel(MatchProp1), Options, DefaultProp), diff --git a/lib/ag_modules/isub_selecter.pl b/lib/ag_modules/isub_selecter.pl new file mode 100644 index 0000000..18c47e7 --- /dev/null +++ b/lib/ag_modules/isub_selecter.pl @@ -0,0 +1,38 @@ +:- module(isub_selecter, + []). + +:- public amalgame_module/1. +:- public selecter/5. +:- public parameter/4. + +:- use_module(label_selecter). +:- use_module(string_match_util). +:- use_module(isub_match). + +amalgame_module(amalgame:'IsubSelecter'). + +parameter(type, + oneof([source,target, all]), all, + 'Select all exact label matches or pick best source/target to disambiguate'). + +parameter(sourcelabel, oneof(LabelProps), Default, + '(Super)Property to get label of the source by') :- + rdf_equal(Default, rdfs:label), + label_list(LabelProps). +parameter(targetlabel, oneof(LabelProps), Default, + '(Super)Property to get the label of the target by') :- + rdf_equal(Default, rdfs:label), + label_list(LabelProps). +parameter(threshold, float, 0.7, + 'threshold edit distance'). +parameter(language, oneof(['any'|L]), 'any', 'Language of source label') :- + strategy_languages(_,L). +parameter(matchacross_lang, + boolean, true, + 'Allow labels from different language to be matched'). +parameter(normalize, + boolean, false, + '(Case) normalize strings as described in the isub article'). + +selecter(In, Sel, Dis, Und, Options) :- + label_selecter(isub_match, In, Sel, Dis, Und, Options). diff --git a/lib/ag_modules/label_selecter.pl b/lib/ag_modules/label_selecter.pl index 1a732f3..422a35c 100644 --- a/lib/ag_modules/label_selecter.pl +++ b/lib/ag_modules/label_selecter.pl @@ -1,12 +1,23 @@ :- module(label_selecter, - [ label_selecter/7 + [ label_selecter/6 ]). /* This module provides a meta predicate label_selecter/7, which implements the selecter/5 predicate of the * label matching partitioners. */ -:- meta_predicate label_selecter(+, 3, +, -, -, -, +). +:- meta_predicate label_selecter(3, +, -, -, -, +). + +label_selecter(Matcher, In, Sel, Dis, Und, Options) :- + option(type(SourceOrTarget), Options, all), + ( SourceOrTarget \= source + -> label_selecter(SourceOrTarget, Matcher, In, Sel, Dis, Und, Options) + ; predsort(ag_map:compare_align(target), In, InT), + label_selecter(SourceOrTarget, Matcher, InT, Sel0, Dis0, Und0, Options), + predsort(ag_map:compare_align(source), Sel0, Sel), + predsort(ag_map:compare_align(source), Dis0, Dis), + predsort(ag_map:compare_align(source), Und0, Und) + ). label_selecter( _, _, [], [], [], [], _). label_selecter(all, Matcher, [Head|Tail], Sel, Dis, [], Options) :- @@ -14,6 +25,9 @@ label_selecter(all, Matcher, [Head|Tail], Sel, Dis, [], Options) :- -> Sel = [Match|TSel], Dis = TDis ; Sel = TSel, - Dis = [Head|TDis] + Head = align(S,T,P), + MisEv = [method(Matcher), score([result(discarded)])], + MisMatch = align(S,T, [MisEv|P]), + Dis = [MisMatch|TDis] ), label_selecter(all, Matcher, Tail, TSel, TDis, [], Options). diff --git a/rdf/tool/ag_modules.ttl b/rdf/tool/ag_modules.ttl index 54b7f47..4c37366 100644 --- a/rdf/tool/ag_modules.ttl +++ b/rdf/tool/ag_modules.ttl @@ -22,6 +22,11 @@ amalgame:ExactLabelMatcher skos:definition "Generate new candidates based on exact matching labels of source and target concepts."@en ; rdfs:subClassOf amalgame:CandidateGenerator . +amalgame:IsubMatcher + rdfs:label "generate/label/similarity"@en ; + skos:definition "Generate new candidates based on similar labels. The matcher is based on the 'isub' metric introduced in 'A string metric for ontology alignment' by Giorgos Stoilos, 2005."@en ; + rdfs:subClassOf amalgame:CandidateGenerator . + amalgame:RelatedMatcher amalgame:need_secondary_inputs true ; rdfs:label "generate/structure/related"@en ; @@ -47,6 +52,11 @@ amalgame:ExactLabelSelecter skos:definition "Select mappings with the most matching labels, discard others for the same source/target. If type=all, all candidates with matching labels are selected."@en ; rdfs:subClassOf amalgame:MappingPartitioner . +amalgame:IsubSelecter + rdfs:label "partition/label/similarity"@en ; + skos:definition "Select mappings with the most similar labels, discard others for the same source/target. If type=all, all candidates with sufficiently similar labels are selected. The matcher is based on the 'isub' metric introduced in 'A string metric for ontology alignment' by Giorgos Stoilos, 2005."@en ; + rdfs:subClassOf amalgame:MappingPartitioner . + amalgame:RelatedSelecter amalgame:need_secondary_inputs true ; rdfs:label "partition/structure/related"@en ; @@ -103,11 +113,6 @@ amalgame:SnowballMatcher skos:definition "A label matcher with similarity based on (snowball) stemming."@en ; rdfs:subClassOf amalgame:Matcher . -amalgame:IsubMatcher - rdfs:label "string/similarity"@en ; - skos:definition "A string similarity matcher based on the 'isub' metric introduced in 'A string metric for ontology alignment' by Giorgos Stoilos, 2005."@en ; - rdfs:subClassOf amalgame:Matcher . - amalgame:AritySelect rdfs:label "ambiguity/remove"@en ; skos:definition "Select correspondences with a unique source, target or both, discard others"@en;