amalgame/commit
CHANGED: exact_match splitted into generator and partitioner
author | Jacco van Ossenbruggen |
---|---|
Sat Sep 6 11:51:20 2014 +0200 | |
committer | Jacco van Ossenbruggen |
Sat Sep 6 11:51:20 2014 +0200 | |
commit | 8fe3ed2c1c9744b67da5e8891f907b836999064b |
tree | 2fe7b1f12c58832b73cb8cec9a5f04661d837cd4 |
parent | f21da2c375f960e188a245f4a2fafe66c7c70aba |
Diff style: patch stat
diff --git a/config-available/ag_modules.pl b/config-available/ag_modules.pl index 82542b2..9fb7528 100644 --- a/config-available/ag_modules.pl +++ b/config-available/ag_modules.pl @@ -1,8 +1,12 @@ :- module(conf_ag_modules, []). +% Candidate correspondence generator components: +:- use_module(library(ag_modules/exact_label_generator)). + +% Mapping producing partitioners: +:- use_module(library(ag_modules/exact_label_selecter)). % Modules that can be used as matchers -:- use_module(library(ag_modules/exact_label_match)). :- use_module(library(ag_modules/compound_match)). :- use_module(library(ag_modules/snowball_match)). :- use_module(library(ag_modules/isub_match)). diff --git a/lib/ag_modules/exact_label_generator.pl b/lib/ag_modules/exact_label_generator.pl new file mode 100644 index 0000000..caa4bb3 --- /dev/null +++ b/lib/ag_modules/exact_label_generator.pl @@ -0,0 +1,63 @@ +:- module(exact_label_generator, []). + +:- use_module(library(semweb/rdf_db)). +:- use_module(library(amalgame/vocabulary)). +:- use_module(exact_label_match). +:- use_module(string_match_util). + +:- public amalgame_module/1. +:- public filter/3. +:- public matcher/4. +:- public parameter/4. + +amalgame_module(amalgame:'ExactLabelMatcher'). +amalgame_module(amalgame:'ExactLabelFilter'). + +parameter(sourcelabel, oneof(LabelProps), Default, + '(Super)Property to get label of the source by') :- + rdf_equal(Default, rdfs:label), + label_list(LabelProps). +parameter(targetlabel, oneof(LabelProps), Default, + '(Super)Property to get the label of the target by') :- + rdf_equal(Default, rdfs:label), + label_list(LabelProps). +parameter(source_language, oneof(['any'|L]), 'any', + 'Language of source label') :- + strategy_languages(_S,L). +parameter(matchacross_lang, boolean, true, + 'Allow labels from different language to be matched'). +parameter(matchacross_type, boolean, true, + 'Allow labels from different types to be matched'). +parameter(case_sensitive, boolean, false, + 'When true the case of labels must be equal'). +parameter(match_qualified_only, boolean, false, + 'Match only on the fully qualified label'). + +%% filter(+MappingsIn, -MappingsOut, +Options) +% +% Filter mappings based on exact matching of labels. + +filter([], [], _). +filter([align(S,T,P)|Cs], [C|Mappings], Options) :- + ( T = scheme(TargetScheme) + -> exact_label_match(align(S,_,P), C, [target_scheme(TargetScheme)|Options]) + ; exact_label_match(align(S,T,P), C, Options) + ), + !, + filter(Cs, Mappings, Options). +filter([_|Cs], Mappings, Options) :- + filter(Cs, Mappings, Options). + + +%% matcher(+Source, +Target, -Mappings, +Options) +% +% Mappings is a sorted list of matches between instances of Source +% and Target. + +matcher(Source, Target, Mappings, Options) :- + findall(M, align(Source, Target, M, Options), Mappings0), + sort(Mappings0, Mappings). + +align(Source, Target, Match, Options) :- + vocab_member(S, Source), + exact_label_match(align(S,_,[]), Match, [target_scheme(Target)|Options]). diff --git a/lib/ag_modules/exact_label_match.pl b/lib/ag_modules/exact_label_match.pl index 1a749d8..0a466f3 100644 --- a/lib/ag_modules/exact_label_match.pl +++ b/lib/ag_modules/exact_label_match.pl @@ -1,68 +1,13 @@ -:- module(exact_label_match, - []). +:- module(exact_label_match, [ + exact_label_match/3 + ]). :- use_module(library(semweb/rdf_db)). :- use_module(library(amalgame/vocabulary)). :- use_module(string_match_util). -:- public amalgame_module/1. -:- public filter/3. -:- public matcher/4. -:- public parameter/4. - -amalgame_module(amalgame:'ExactLabelMatcher'). -amalgame_module(amalgame:'ExactLabelFilter'). - -parameter(sourcelabel, oneof(LabelProps), Default, - '(Super)Property to get label of the source by') :- - rdf_equal(Default, rdfs:label), - label_list(LabelProps). -parameter(targetlabel, oneof(LabelProps), Default, - '(Super)Property to get the label of the target by') :- - rdf_equal(Default, rdfs:label), - label_list(LabelProps). -parameter(source_language, oneof(['any'|L]), 'any', - 'Language of source label') :- - strategy_languages(_S,L). -parameter(matchacross_lang, boolean, true, - 'Allow labels from different language to be matched'). -parameter(matchacross_type, boolean, true, - 'Allow labels from different types to be matched'). -parameter(case_sensitive, boolean, false, - 'When true the case of labels must be equal'). -parameter(match_qualified_only, boolean, false, - 'Match only on the fully qualified label'). - -%% filter(+MappingsIn, -MappingsOut, +Options) -% -% Filter mappings based on exact matching of labels. - -filter([], [], _). -filter([align(S,T,P)|Cs], [C|Mappings], Options) :- - ( T = scheme(TargetScheme) - -> match(align(S,_,P), C, [target_scheme(TargetScheme)|Options]) - ; match(align(S,T,P), C, Options) - ), - !, - filter(Cs, Mappings, Options). -filter([_|Cs], Mappings, Options) :- - filter(Cs, Mappings, Options). - - -%% matcher(+Source, +Target, -Mappings, +Options) -% -% Mappings is a sorted list of matches between instances of Source -% and Target. - -matcher(Source, Target, Mappings, Options) :- - findall(M, align(Source, Target, M, Options), Mappings0), - sort(Mappings0, Mappings). - -align(Source, Target, Match, Options) :- - vocab_member(S, Source), - match(align(S,_,[]), Match, [target_scheme(Target)|Options]). - -match(align(Source, Target, Prov0), align(Source, Target, [Prov|Prov0]), Options) :- +exact_label_match(align(Source, Target, Prov0), + align(Source, Target, [Prov|Prov0]), Options) :- rdf_equal(rdfs:label, RdfsLabel), option(sourcelabel(MatchPropS), Options, RdfsLabel), option(targetlabel(MatchPropT), Options, RdfsLabel), @@ -71,7 +16,7 @@ match(align(Source, Target, Prov0), align(Source, Target, [Prov|Prov0]), Options option(case_sensitive(CaseSensitive), Options, false), option(source_language(Lang), Options, 'any'), ( Lang == 'any' - -> SourceLang = _ + -> SourceLang = _UnBound ; SourceLang = Lang ), diff --git a/lib/ag_modules/exact_label_selecter.pl b/lib/ag_modules/exact_label_selecter.pl new file mode 100644 index 0000000..7158fdb --- /dev/null +++ b/lib/ag_modules/exact_label_selecter.pl @@ -0,0 +1,49 @@ +:- module(exact_label_selecter, + []). + +:- public amalgame_module/1. +:- public selecter/5. +:- public parameter/4. + +:- use_module(library(sort)). +:- use_module(library(amalgame/map)). +:- use_module(label_selecter). +:- use_module(exact_label_match). +:- use_module(string_match_util). + +parameter(type, + oneof([source,target, all]), all, + 'Select all exact label matches or pick best source/target to disambiguate'). + +parameter(sourcelabel, oneof(LabelProps), Default, + '(Super)Property to get label of the source by') :- + rdf_equal(Default, rdfs:label), + label_list(LabelProps). +parameter(targetlabel, oneof(LabelProps), Default, + '(Super)Property to get the label of the target by') :- + rdf_equal(Default, rdfs:label), + label_list(LabelProps). +parameter(source_language, oneof(['any'|L]), 'any', + 'Language of source label') :- + strategy_languages(_S,L). +parameter(matchacross_lang, boolean, true, + 'Allow labels from different language to be matched'). +parameter(matchacross_type, boolean, true, + 'Allow labels from different types to be matched'). +parameter(case_sensitive, boolean, false, + 'When true the case of labels must be equal'). +parameter(match_qualified_only, boolean, false, + 'Match only on the fully qualified label'). + +amalgame_module(amalgame:'ExactLabelSelecter'). + +selecter(In, Sel, Dis, Und, Options) :- + option(type(SourceOrTarget), Options, all), + ( SourceOrTarget \= source + -> label_selecter(SourceOrTarget, exact_label_match, In, Sel, Dis, Und, Options) + ; predsort(ag_map:compare_align(target), In, InT), + label_selecter(SourceOrTarget, exact_label_match, InT, Sel0, Dis0, Und0, Options), + predsort(ag_map:compare_align(source), Sel0, Sel), + predsort(ag_map:compare_align(source), Dis0, Dis), + predsort(ag_map:compare_align(source), Und0, Und) + ). diff --git a/lib/ag_modules/label_selecter.pl b/lib/ag_modules/label_selecter.pl new file mode 100644 index 0000000..1a732f3 --- /dev/null +++ b/lib/ag_modules/label_selecter.pl @@ -0,0 +1,19 @@ +:- module(label_selecter, + [ label_selecter/7 + ]). + +/* This module provides a meta predicate label_selecter/7, which implements the selecter/5 predicate of the + * label matching partitioners. + */ + +:- meta_predicate label_selecter(+, 3, +, -, -, -, +). + +label_selecter( _, _, [], [], [], [], _). +label_selecter(all, Matcher, [Head|Tail], Sel, Dis, [], Options) :- + ( call(Matcher, Head, Match, Options) + -> Sel = [Match|TSel], + Dis = TDis + ; Sel = TSel, + Dis = [Head|TDis] + ), + label_selecter(all, Matcher, Tail, TSel, TDis, [], Options). diff --git a/rdf/tool/ag_modules.ttl b/rdf/tool/ag_modules.ttl index 3bae959..e9a2d36 100644 --- a/rdf/tool/ag_modules.ttl +++ b/rdf/tool/ag_modules.ttl @@ -3,6 +3,18 @@ @prefix skos: <http://www.w3.org/2004/02/skos/core#> . @prefix amalgame: <http://purl.org/vocabularies/amalgame#> . +amalgame:ExactLabelMatcher + rdfs:label "generate/label (exact)"@en ; + skos:definition "Generate new candidates based on exact matching labels of source and target concepts"@en ; + rdfs:subClassOf amalgame:CandidateGenerator . + +amalgame:ExactLabelSelecter + rdfs:label "partition/label (exact)"@en ; + skos:definition "Partition existing candidates based on exact matching labels of source and target concepts"@en ; + rdfs:subClassOf amalgame:MappingPartitioner . + +#################### + amalgame:EvaluationProcess rdfs:label "Manual evaluation"@en ; skos:definition "A process class representing manual evaluation processes "@en ; @@ -18,11 +30,6 @@ amalgame:SelectPreLoadedSelecter skos:definition "Select mappings with corresponding mappings in the preloaded mapping, discard others with the same source/target."@en ; rdfs:subClassOf amalgame:MappingPartitioner . -amalgame:ExactLabelMatcher - rdfs:label "string/label (exact)"@en ; - skos:definition "A basic label matcher based on exact matching labels of source and target concepts"@en ; - rdfs:subClassOf amalgame:Matcher . - amalgame:CompoundMatcher rdfs:label "string/label (compound)"@en ; skos:definition "A label matcher matching after compound splitting the label(s) of the source concepts"@en ; diff --git a/rdf/tool/amalgame.ttl b/rdf/tool/amalgame.ttl index 8a2bb2d..5e4bcdf 100644 --- a/rdf/tool/amalgame.ttl +++ b/rdf/tool/amalgame.ttl @@ -11,7 +11,7 @@ amalgame:AlignmentStrategy a prov:Plan ; rdfs:label "Alignment strategy"@en ; - rdfs:comment "RDF representation of an alignment strategy that can be loaded and executed by Amalgame"@en. + rdfs:comment "RDF representation of an alignment strategy that can be loaded into, and executed by, Amalgame"@en. amalgame:Entity rdfs:label "Entity"@en ; @@ -23,18 +23,44 @@ amalgame:Process rdfs:comment "Top class for all amalgame processes"@en ; rdfs:subClassOf prov:Activity . +amalgame:Partitioner + rdfs:label "Partitioner"@en ; + rdfs:comment "A process that partitions an existing Entity into subsets"@en ; + rdfs:subClassOf amalgame:Process . + +amalgame:CandidateGenerator + rdfs:label "Candidate generator"@en ; + rdfs:comment "A process that generates a mapping with candidate correspondences."@en ; + rdfs:subClassOf amalgame:Matcher . # fix me, should become amalgame:Process . + +amalgame:MappingPartitioner + rdfs:label "Mapping partitioner"@en ; + skos:definition "Component that creates subsets from an existing mapping"@en ; + rdfs:subClassOf amalgame:Partitioner . + +amalgame:VocabPartitioner + rdfs:label "Vocabulary partitioner"@en ; + skos:definition "Component that creates subsets from an existing concept scheme"@en ; + rdfs:subClassOf amalgame:Partitioner . + +amalgame:VirtualVocabPartitioner + rdfs:label "Virtual vocabulary partitioner"@en ; + skos:definition "Component that creates virtual subsets from an existing concept scheme"@en ; + skos:note "A virtual scheme is defined intentionally, not materialized in the store. Amalgame can iterate over all its 'inScheme' members and test whether a concept is inScheme or not."@en ; + rdfs:subClassOf amalgame:VocabPartitioner . + +############################### + +amalgame:Matcher + rdfs:label "Matcher (deprecated)"@en ; + skos:definition "Component that creates a mapping given a source and target vocabulary"@en ; + rdfs:subClassOf amalgame:Process . + amalgame:parameters a rdf:Property ; rdfs:domain amalgame:Process ; rdfs:range rdfs:Literal . -amalgame:Matcher - rdfs:label "Matcher"@en ; - skos:definition "Component that creates a mapping given a source and target vocabulary"@en ; - rdfs:subClassOf amalgame:Process . -amalgame:Partitioner - rdfs:label "Partitioner"@en ; - rdfs:subClassOf amalgame:Process . amalgame:MultiInputComponent rdfs:label "Multi-input operator"@en ; skos:definition "Component with multiple first-class inputs"@en ; @@ -43,16 +69,6 @@ amalgame:SetOperator rdfs:label "Set operator"@en ; skos:definition "Component that creates a mapping by combining the correspondences from a number of mappings"@en ; rdfs:subClassOf amalgame:MultiInputComponent . -amalgame:MappingPartitioner - rdfs:label "Mapping Partitioner"@en ; - skos:definition "Component that creates subsets from an existing mapping"@en ; - rdfs:subClassOf amalgame:Partitioner . -amalgame:VocabPartitioner - rdfs:label "Vocabulary Partitioner"@en ; - skos:definition "Component that creates subsets from an existing concept scheme"@en ; - rdfs:subClassOf amalgame:Partitioner . -amalgame:VirtualVocabPartitioner - rdfs:subClassOf amalgame:VocabPartitioner . amalgame:Merger rdfs:label "merger"@en ; skos:definition "Component that creates a mapping by mergin the correspondences from a number of mappings"@en ;