amalgame/commit
REFACTORING: compound label matching no follows the new generator/partition convention
author | Jacco van Ossenbruggen |
---|---|
Mon Sep 8 09:34:40 2014 +0200 | |
committer | Jacco van Ossenbruggen |
Mon Sep 8 09:34:40 2014 +0200 | |
commit | 15df88d9521ad54868de5e55bde478c3a7dc359e |
tree | 84a037dc4db85f0cd033a770f8c91c6332f7dacf |
parent | 9047ef14a86eab9d5ccd06443570531af33876ec |
Diff style: patch stat
diff --git a/config-available/ag_modules.pl b/config-available/ag_modules.pl index 7558085..2e0fb0b 100644 --- a/config-available/ag_modules.pl +++ b/config-available/ag_modules.pl @@ -2,6 +2,7 @@ % Candidate correspondence generator components: :- use_module(library(ag_modules/ancestor_generator)). +:- use_module(library(ag_modules/compound_label_generator)). :- use_module(library(ag_modules/descendent_generator)). :- use_module(library(ag_modules/exact_label_generator)). :- use_module(library(ag_modules/isub_generator)). @@ -9,6 +10,7 @@ % Mapping producing partitioners: :- use_module(library(ag_modules/ancestor_selecter)). +:- use_module(library(ag_modules/compound_label_selecter)). :- use_module(library(ag_modules/descendent_selecter)). :- use_module(library(ag_modules/exact_label_selecter)). :- use_module(library(ag_modules/isub_selecter)). @@ -21,7 +23,6 @@ :- use_module(library(ag_modules/propvalue_select)). % Modules that can be used as matchers -:- use_module(library(ag_modules/compound_match)). :- use_module(library(ag_modules/snowball_match)). :- use_module(library(ag_modules/preloaded_mapping)). diff --git a/lib/ag_modules/compound_label_generator.pl b/lib/ag_modules/compound_label_generator.pl new file mode 100644 index 0000000..69672cf --- /dev/null +++ b/lib/ag_modules/compound_label_generator.pl @@ -0,0 +1,50 @@ +:- module(compound_label_generator, + []). + +:- use_module(library(lists)). +:- use_module(library(semweb/rdf_db)). +:- use_module(library(amalgame/vocabulary)). +:- use_module(string_match_util). +:- use_module(compound_label_match). + +:- public amalgame_module/1. +:- public matcher/4. +:- public parameter/4. + +amalgame_module(amalgame:'CompoundMatcher'). + +parameter(sourcelabel, oneof(LabelProps), Default, + '(Super)Property to get label of the source by') :- + rdf_equal(Default, rdfs:label), + label_list(LabelProps). +parameter(targetlabel, oneof(LabelProps), Default, + '(Super)Property to get the label of the target by') :- + rdf_equal(Default, rdfs:label), + label_list(LabelProps). +parameter(source_language, oneof(['any'|L]), 'any', 'Language of source label') :- + strategy_languages(_,L). +parameter(matchacross_lang, boolean, true, + 'Allow labels from different language to be matched'). +parameter(matchacross_type, boolean, true, + 'Allow labels from different types to be matched'). +parameter(case_sensitive, boolean, false, + 'When true the case of labels must be equal'). +parameter(match_qualified_only, boolean, false, + 'Match only on the fully qualified label'). + +%% matcher(+Source, +Target, -Mappings, +Options) +% +% Mappings is a sorted list of matches between instances of Source +% and Target. + +matcher(Source, Target, Mappings, Options) :- + findall(M, align(Source, Target, M, Options), ListOfLists), + append(ListOfLists, MappingsUnSorted), + sort(MappingsUnSorted, Mappings). + +align(Source, TargetScheme, MatchList, Options) :- + vocab_member(S, Source), + compound_label_match(align(S,_,[]), MatchList, + [target_scheme(TargetScheme)|Options]). + + diff --git a/lib/ag_modules/compound_match.pl b/lib/ag_modules/compound_label_match.pl similarity index 52% rename from lib/ag_modules/compound_match.pl rename to lib/ag_modules/compound_label_match.pl index a29663b..4406f76 100644 --- a/lib/ag_modules/compound_match.pl +++ b/lib/ag_modules/compound_label_match.pl @@ -1,71 +1,17 @@ -:- module(compound_match, - []). +:- module(compound_label_match, + [ compound_label_match/3 ]). +:- use_module(library(lists)). +:- use_module(library(option)). :- use_module(library(semweb/rdf_db)). :- use_module(library(semweb/rdf_litindex)). :- use_module(library(amalgame/vocabulary)). :- use_module(string_match_util). -:- public amalgame_module/1. -:- public filter/3. -:- public matcher/4. -:- public parameter/4. - -amalgame_module(amalgame:'CompoundMatcher'). - -parameter(sourcelabel, oneof(LabelProps), Default, - '(Super)Property to get label of the source by') :- - rdf_equal(Default, rdfs:label), - label_list(LabelProps). -parameter(targetlabel, oneof(LabelProps), Default, - '(Super)Property to get the label of the target by') :- - rdf_equal(Default, rdfs:label), - label_list(LabelProps). -parameter(language, oneof(['any'|L]), 'any', 'Language of source label') :- - strategy_languages(_,L). -parameter(matchacross_lang, boolean, true, - 'Allow labels from different language to be matched'). -parameter(matchacross_type, boolean, true, - 'Allow labels from different types to be matched'). -parameter(case_sensitive, boolean, false, - 'When true the case of labels must be equal'). - -%% filter(+MappingsIn, -MappingsOut, +Options) -% -% Filter mappings based on exact matching of labels. - -filter([], [], _). -filter([align(S,T,P)|Cs], [C|Mappings], Options) :- - ( T = scheme(_) - -> match(align(S,_,P), C, Options), - C=align(_,T2,_), - vocab_member(T2, T) - ; match(align(S,T,P), C, Options) - ), - !, - filter(Cs, Mappings, Options). -filter([_|Cs], Mappings, Options) :- - filter(Cs, Mappings, Options). - - -%% matcher(+Source, +Target, -Mappings, +Options) -% -% Mappings is a sorted list of matches between instances of Source -% and Target. - -matcher(Source, Target, Mappings, Options) :- - findall(M, align(Source, Target, M, Options), Mappings0), - flatten(Mappings0, MappingsFlat), - sort(MappingsFlat, Mappings). - -align(Source, TargetScheme, Match, Options) :- - vocab_member(S, Source), - match(align(S,TargetScheme,[]), Match, Options). - -match(align(Source, TargetScheme, Prov0), Results, Options) :- +compound_label_match(align(Source, _Target, Prov0), Results, Options) :- rdf_equal(rdfs:label, RdfsLabel), option(sourcelabel(MatchProp1), Options, RdfsLabel), - option(language(Lang), Options, 'any'), + option(source_language(Lang), Options, 'any'), ( Lang == 'any' -> SourceLang = _ @@ -75,16 +21,18 @@ match(align(Source, TargetScheme, Prov0), Results, Options) :- Prov = [method(compound_label_match), score(Score), match(Match), - graph([rdf(Source, SourceProp, literal(lang(SourceLang, SourceLabel)))]) + graph([rdf(Source, SourceProp, + literal(lang(SourceLang, SourceLabel)))]) ], - skos_match(Source, MatchProp1, literal(lang(SourceLang, SourceLabel)), SourceProp, Options), + skos_match(Source, MatchProp1, + literal(lang(SourceLang, SourceLabel)), SourceProp, Options), rdf_tokenize_literal(SourceLabel, Tokens), findall(Targets-Token-LabelAmbScore, ( member(Token, Tokens), atom(Token), match_label(Source, Token, Targets, - [scheme(TargetScheme), sourcelang(SourceLang)|Options]), + [source_lang(SourceLang)|Options]), length(Targets, LabelAmbScore) ), @@ -118,8 +66,7 @@ match_label(Source, Label, Targets, Options) :- option(matchacross_lang(MatchAcross), Options, true), option(matchacross_type(IgnoreType), Options, true), option(case_sensitive(CaseSensitive), Options, false), - option(sourcelang(SourceLang), Options), - option(scheme(TargetScheme), Options), + option(source_lang(SourceLang), Options), ( CaseSensitive -> SearchTarget=literal(lang(TargetLang, Label)) @@ -133,9 +80,12 @@ match_label(Source, Label, Targets, Options) :- ), findall(Target-ProvGraph, - ( skos_match(Target, MatchProp, SearchTarget, TargetProp, Options), - Source \== Target, % fix me, replace by target vocab check as in exact_label example - vocab_member(Target, TargetScheme), + ( skos_match(Target, MatchProp, SearchTarget, + TargetProp, Options), + ( option(target_scheme(TargetScheme), Options) + -> vocab_member(Target, TargetScheme) + ; true + ), ( IgnoreType -> true ; matching_types(Source, Target) @@ -147,5 +97,4 @@ match_label(Source, Label, Targets, Options) :- ProvGraph = [rdf(Target, TargetProp, TargetTerm)] ), Targets), - (ground(Targets) -> true; gtrace), Targets \= []. diff --git a/lib/ag_modules/compound_label_selecter.pl b/lib/ag_modules/compound_label_selecter.pl new file mode 100644 index 0000000..1294731 --- /dev/null +++ b/lib/ag_modules/compound_label_selecter.pl @@ -0,0 +1,40 @@ +:- module(compound_label_selecter, + []). + +:- public amalgame_module/1. +:- public selecter/5. +:- public parameter/4. + +:- use_module(label_selecter). +:- use_module(compound_label_match). +:- use_module(string_match_util). + +parameter(type, + oneof([source,target, all]), all, + 'Select all exact label matches or pick best source/target to disambiguate'). + +parameter(sourcelabel, oneof(LabelProps), Default, + '(Super)Property to get label of the source by') :- + rdf_equal(Default, rdfs:label), + label_list(LabelProps). +parameter(targetlabel, oneof(LabelProps), Default, + '(Super)Property to get the label of the target by') :- + rdf_equal(Default, rdfs:label), + label_list(LabelProps). +parameter(source_language, oneof(['any'|L]), 'any', + 'Language of source label') :- + strategy_languages(_S,L). +parameter(matchacross_lang, boolean, true, + 'Allow labels from different language to be matched'). +parameter(matchacross_type, boolean, true, + 'Allow labels from different types to be matched'). +parameter(case_sensitive, boolean, false, + 'When true the case of labels must be equal'). +parameter(match_qualified_only, boolean, false, + 'Match only on the fully qualified label'). + +amalgame_module(amalgame:'CompoundLabelSelecter'). + +selecter(In, Sel, Dis, Und, Options) :- + label_selecter(compound_label_match, In, SelList, Dis, Und, Options), + append(SelList, Sel). % HACK FIX ME diff --git a/lib/ag_modules/exact_label_match.pl b/lib/ag_modules/exact_label_match.pl index 0a466f3..a64a79d 100644 --- a/lib/ag_modules/exact_label_match.pl +++ b/lib/ag_modules/exact_label_match.pl @@ -6,6 +6,15 @@ :- use_module(library(amalgame/vocabulary)). :- use_module(string_match_util). +%% exact_label_match(?In, -Out, +Options) is non det. +% +% In and Out are align(S,T,P) terms. +% Typically, there are to modes for calling this predicate: +% +% * Both S and T of In are instantiated, and the predicate is +% called to find a match between S and T, or +% * Only S is ground and the predicate is called to find a +% matching T. exact_label_match(align(Source, Target, Prov0), align(Source, Target, [Prov|Prov0]), Options) :- rdf_equal(rdfs:label, RdfsLabel), diff --git a/rdf/tool/ag_modules.ttl b/rdf/tool/ag_modules.ttl index 5149193..a4c716d 100644 --- a/rdf/tool/ag_modules.ttl +++ b/rdf/tool/ag_modules.ttl @@ -12,8 +12,8 @@ amalgame:AncestorMatcher rdfs:subClassOf amalgame:CandidateGenerator . amalgame:CompoundMatcher - rdfs:label "generate/label/compound"@en ; - skos:definition "A label matcher matching after compound splitting the label(s) of the source concepts"@en ; + rdfs:label "generate/label/compound source"@en ; + skos:definition "A label matcher matching after compound splitting the label(s) of the source concepts. Source labels are splitted using rdf_tokenize_literal/2. Warning: source label tokens are matched against complete target labels! "@en ; rdfs:subClassOf amalgame:CandidateGenerator . amalgame:DescendentMatcher @@ -46,6 +46,11 @@ amalgame:AncestorSelecter skos:definition "Select mappings with the most mapped ancestors, discard others for the same source/target. If type=all, all correspondences with one or more ancestors are selected."@en ; rdfs:subClassOf amalgame:MappingPartitioner . +amalgame:CompoundLabelSelecter + rdfs:label "partition/label/compound source"@en ; + skos:definition "Select mappings with the most matching labels, discard others for the same source/target. If type=all, all candidates with matching labels are selected. All matching is done after compound splitting the label(s) of the source concepts. Source labels are splitted using rdf_tokenize_literal/2. Warning: source label tokens are matched against complete target labels!"@en ; + rdfs:subClassOf amalgame:MappingPartitioner . + amalgame:DescendentSelecter amalgame:need_secondary_inputs true ; rdfs:label "partition/structure/descendent"@en ;