amalgame/commit
added numeric difference partioner
author | Jacco van Ossenbruggen |
---|---|
Tue Mar 3 18:45:12 2015 +0100 | |
committer | Jacco van Ossenbruggen |
Tue Mar 3 18:45:12 2015 +0100 | |
commit | 5fc88e3a70ee8ace62fcb9dc1ae5c9d1a74324c0 |
tree | 3a39c2300fdd2efb0cf815ce7fbd66812477a1b8 |
parent | 382fa5e1ae118c28bbbc9c115a08336a841b6baa |
Diff style: patch stat
diff --git a/config-available/ag_modules.pl b/config-available/ag_modules.pl index debcd5b..438d712 100644 --- a/config-available/ag_modules.pl +++ b/config-available/ag_modules.pl @@ -6,6 +6,7 @@ :- use_module(library(ag_modules/descendent_generator)). :- use_module(library(ag_modules/exact_label_generator)). :- use_module(library(ag_modules/isub_generator)). +:- use_module(library(ag_modules/numeric_difference_generator)). :- use_module(library(ag_modules/preloaded_mapping)). :- use_module(library(ag_modules/related_generator)). :- use_module(library(ag_modules/snowball_label_generator)). @@ -16,6 +17,7 @@ :- use_module(library(ag_modules/descendent_selecter)). :- use_module(library(ag_modules/exact_label_selecter)). :- use_module(library(ag_modules/isub_selecter)). +:- use_module(library(ag_modules/numeric_difference_selecter)). :- use_module(library(ag_modules/preloaded_selecter)). :- use_module(library(ag_modules/related_selecter)). :- use_module(library(ag_modules/snowball_label_selecter)). diff --git a/lib/ag_modules/numeric_difference_generator.pl b/lib/ag_modules/numeric_difference_generator.pl new file mode 100644 index 0000000..316e541 --- /dev/null +++ b/lib/ag_modules/numeric_difference_generator.pl @@ -0,0 +1,41 @@ +:- module(numeric_difference_generator, + []). + +:- use_module(library(semweb/rdf_db)). +:- use_module(library(amalgame/vocabulary)). +:- use_module(library(amalgame/rdf_util)). + +:- use_module(numeric_difference_match). + + +:- public matcher/4. +:- public parameter/4. +:- public amalgame_module/1. + +amalgame_module(amalgame:'NumericDifferenceMatcher'). + +parameter(sourcelabel, oneof(LiteralProps), Default, + '(Super)Property to get label of the source by') :- + rdf_equal(Default, rdfs:label), + rdf_literal_predicates(LiteralProps). +parameter(targetlabel, oneof(LiteralProps), Default, + '(Super)Property to get the label of the target by') :- + rdf_equal(Default, rdfs:label), + rdf_literal_predicates(LiteralProps). + +parameter(threshold, float, 0.05, + 'threshold absolute difference'). + +%% matcher(+Source, +Target, -Mappings, +Options) +% +% Mappings is a list of matches between instances of Source and +% Target. + +matcher(Source, Target, Mappings, Options) :- + findall(M, align(Source, Target, M, Options), Mappings0), + sort(Mappings0, Mappings). + +align(Source, Target, Match, Options) :- + vocab_member(S, Source), + vocab_member(T, Target), + numeric_difference_match(align(S,T,[]), Match, Options). diff --git a/lib/ag_modules/numeric_difference_match.pl b/lib/ag_modules/numeric_difference_match.pl new file mode 100644 index 0000000..919ad1b --- /dev/null +++ b/lib/ag_modules/numeric_difference_match.pl @@ -0,0 +1,34 @@ +:- module(numeric_difference_match, + [numeric_difference_match/3]). + +:- use_module(library(option)). +:- use_module(library(semweb/rdf_db)). +:- use_module(library(semweb/rdf_label)). + +numeric_difference_match(align(Source, Target, Prov0), + align(Source, Target, [Prov|Prov0]), Options) :- + rdf_equal(skos:definition, DefaultProp), + option(threshold(Threshold), Options, 0.05), + option(sourcelabel(MatchProp1), Options, DefaultProp), + option(targetlabel(MatchProp2), Options, DefaultProp), + + SearchTarget=literal(TargetLit), + + ( rdf_has(Source, MatchProp1, literal(SourceLit), SourceProp), + rdf_has(Target, MatchProp2, SearchTarget, TargetProp), + Source \== Target + -> literal_text(SourceLit, SourceTxt), + atom_number(SourceTxt, SourceNumber), + literal_text(TargetLit, TargetTxt), + atom_number(TargetTxt, TargetNumber), + Difference is abs(SourceNumber-TargetNumber) + ; Difference = -1 + ), + Difference >= 0, + Difference =< Threshold, + Score is 0-Difference, + Prov = [method(numeric_similarity), + match(Score), + graph([rdf(Source, SourceProp, SourceLit), + rdf(Target, TargetProp, TargetLit)]) + ]. diff --git a/lib/ag_modules/numeric_difference_selecter.pl b/lib/ag_modules/numeric_difference_selecter.pl new file mode 100644 index 0000000..d056205 --- /dev/null +++ b/lib/ag_modules/numeric_difference_selecter.pl @@ -0,0 +1,34 @@ +:- module(numeric_similarity_selecter, + []). + +:- public amalgame_module/1. +:- public selecter/5. +:- public parameter/4. + +:- use_module(library(semweb/rdf_db)). + +:- use_module(library(amalgame/rdf_util)). +:- use_module(numeric_difference_match). +:- use_module(label_selecter). + +parameter(type, + oneof([source,target, all]), all, + 'Select all matches or pick best source/target to disambiguate'). + +parameter(threshold, float, 0.05, + 'threshold absolute difference'). + +parameter(sourcelabel, oneof(LiteralProps), Default, + '(Super)Property to get label of the source by') :- + rdf_equal(Default, rdfs:label), + rdf_literal_predicates(LiteralProps). + +parameter(targetlabel, oneof(LiteralProps), Default, + '(Super)Property to get the label of the target by') :- + rdf_equal(Default, rdfs:label), + rdf_literal_predicates(LiteralProps). + +amalgame_module(amalgame:'NumericDifferenceSelecter'). + +selecter(In, Sel, Dis, Und, Options) :- + label_selecter(numeric_difference_match, In, Sel, Dis, Und, Options). diff --git a/rdf/tool/ag_modules.ttl b/rdf/tool/ag_modules.ttl index 06b38ab..aaa6b6e 100644 --- a/rdf/tool/ag_modules.ttl +++ b/rdf/tool/ag_modules.ttl @@ -32,6 +32,11 @@ amalgame:IsubMatcher skos:definition "Generate new candidates based on similar labels. The matcher is based on the 'isub' metric introduced in 'A string metric for ontology alignment' by Giorgos Stoilos, 2005."@en ; rdfs:subClassOf amalgame:CandidateGenerator . +amalgame:NumericDifferenceMatcher + rdfs:label "numeric/difference"@en ; + skos:definition "Generate new candidates based on similar numeric literals. The score is the absolute difference of the two numeric literals compared, or infinite if these are not numbers."@en ; + rdfs:subClassOf amalgame:CandidateGenerator . + amalgame:SelectPreLoaded rdfs:label "import/preloaded"@en ; skos:definition "Match mappings from preloaded named graph against concepts in source and target vocabularies"@en ; @@ -77,6 +82,10 @@ amalgame:IsubSelecter skos:definition "Select mappings with the most similar labels, discard others for the same source/target. If type=all, all candidates with sufficiently similar labels are selected. The matcher is based on the 'isub' metric introduced in 'A string metric for ontology alignment' by Giorgos Stoilos, 2005."@en ; rdfs:subClassOf amalgame:MappingPartitioner . +amalgame:NumericDifferenceSelecter + rdfs:label "numeric/difference"@en ; + skos:definition "Select mappings with similar numeric literals. If type=all, all candidates with sufficiently similar numbers are selected. The score is the absolute difference of the two numeric literals compared, or infinite if these are not numbers."@en ; + rdfs:subClassOf amalgame:MappingPartitioner . amalgame:SelectPreLoadedSelecter rdfs:label "import/preloaded"@en ; skos:definition "Select mappings with corresponding mappings in the preloaded mapping, discard others with the same source/target."@en ; @@ -108,7 +117,7 @@ amalgame:Sampler rdfs:subClassOf amalgame:MappingPartitioner . amalgame:BestNumeric - rdfs:label "best numeric"@en ; + rdfs:label "best numeric score"@en ; skos:definition "Select correspondences that have the best score considering some numerical ranking, discard others."@en ; rdfs:subClassOf amalgame:MappingPartitioner .