amalgame/commit

added numeric difference partioner

authorJacco van Ossenbruggen
Tue Mar 3 18:45:12 2015 +0100
committerJacco van Ossenbruggen
Tue Mar 3 18:45:12 2015 +0100
commit5fc88e3a70ee8ace62fcb9dc1ae5c9d1a74324c0
tree3a39c2300fdd2efb0cf815ce7fbd66812477a1b8
parent382fa5e1ae118c28bbbc9c115a08336a841b6baa
Diff style: patch stat
diff --git a/config-available/ag_modules.pl b/config-available/ag_modules.pl
index debcd5b..438d712 100644
--- a/config-available/ag_modules.pl
+++ b/config-available/ag_modules.pl
@@ -6,6 +6,7 @@
 :- use_module(library(ag_modules/descendent_generator)).
 :- use_module(library(ag_modules/exact_label_generator)).
 :- use_module(library(ag_modules/isub_generator)).
+:- use_module(library(ag_modules/numeric_difference_generator)).
 :- use_module(library(ag_modules/preloaded_mapping)).
 :- use_module(library(ag_modules/related_generator)).
 :- use_module(library(ag_modules/snowball_label_generator)).
@@ -16,6 +17,7 @@
 :- use_module(library(ag_modules/descendent_selecter)).
 :- use_module(library(ag_modules/exact_label_selecter)).
 :- use_module(library(ag_modules/isub_selecter)).
+:- use_module(library(ag_modules/numeric_difference_selecter)).
 :- use_module(library(ag_modules/preloaded_selecter)).
 :- use_module(library(ag_modules/related_selecter)).
 :- use_module(library(ag_modules/snowball_label_selecter)).
diff --git a/lib/ag_modules/numeric_difference_generator.pl b/lib/ag_modules/numeric_difference_generator.pl
new file mode 100644
index 0000000..316e541
--- /dev/null
+++ b/lib/ag_modules/numeric_difference_generator.pl
@@ -0,0 +1,41 @@
+:- module(numeric_difference_generator,
+	  []).
+
+:- use_module(library(semweb/rdf_db)).
+:- use_module(library(amalgame/vocabulary)).
+:- use_module(library(amalgame/rdf_util)).
+
+:- use_module(numeric_difference_match).
+
+
+:- public matcher/4.
+:- public parameter/4.
+:- public amalgame_module/1.
+
+amalgame_module(amalgame:'NumericDifferenceMatcher').
+
+parameter(sourcelabel, oneof(LiteralProps), Default,
+	  '(Super)Property to get label of the source by') :-
+	rdf_equal(Default, rdfs:label),
+	rdf_literal_predicates(LiteralProps).
+parameter(targetlabel, oneof(LiteralProps), Default,
+	  '(Super)Property to get the label of the target by') :-
+	rdf_equal(Default, rdfs:label),
+	rdf_literal_predicates(LiteralProps).
+
+parameter(threshold, float, 0.05,
+	  'threshold absolute difference').
+
+%%      matcher(+Source, +Target, -Mappings, +Options)
+%
+%       Mappings is a list of matches between instances of Source and
+%       Target.
+
+matcher(Source, Target, Mappings, Options) :-
+        findall(M, align(Source, Target, M, Options), Mappings0),
+	sort(Mappings0, Mappings).
+
+align(Source, Target, Match, Options) :-
+        vocab_member(S, Source),
+        vocab_member(T, Target),
+        numeric_difference_match(align(S,T,[]), Match, Options).
diff --git a/lib/ag_modules/numeric_difference_match.pl b/lib/ag_modules/numeric_difference_match.pl
new file mode 100644
index 0000000..919ad1b
--- /dev/null
+++ b/lib/ag_modules/numeric_difference_match.pl
@@ -0,0 +1,34 @@
+:- module(numeric_difference_match,
+	  [numeric_difference_match/3]).
+
+:- use_module(library(option)).
+:- use_module(library(semweb/rdf_db)).
+:- use_module(library(semweb/rdf_label)).
+
+numeric_difference_match(align(Source, Target, Prov0),
+			 align(Source, Target, [Prov|Prov0]), Options) :-
+	rdf_equal(skos:definition, DefaultProp),
+	option(threshold(Threshold), Options, 0.05),
+	option(sourcelabel(MatchProp1), Options, DefaultProp),
+	option(targetlabel(MatchProp2), Options, DefaultProp),
+
+	SearchTarget=literal(TargetLit),
+
+	(   rdf_has(Source, MatchProp1,  literal(SourceLit), SourceProp),
+	    rdf_has(Target, MatchProp2, SearchTarget, TargetProp),
+	    Source \== Target
+	->  literal_text(SourceLit, SourceTxt),
+	    atom_number(SourceTxt, SourceNumber),
+	    literal_text(TargetLit, TargetTxt),
+	    atom_number(TargetTxt, TargetNumber),
+	    Difference is abs(SourceNumber-TargetNumber)
+	;   Difference = -1
+	),
+	Difference >= 0,
+	Difference =< Threshold,
+	Score is 0-Difference,
+	Prov = [method(numeric_similarity),
+		match(Score),
+		graph([rdf(Source, SourceProp, SourceLit),
+		       rdf(Target, TargetProp, TargetLit)])
+	       ].
diff --git a/lib/ag_modules/numeric_difference_selecter.pl b/lib/ag_modules/numeric_difference_selecter.pl
new file mode 100644
index 0000000..d056205
--- /dev/null
+++ b/lib/ag_modules/numeric_difference_selecter.pl
@@ -0,0 +1,34 @@
+:- module(numeric_similarity_selecter,
+	  []).
+
+:- public amalgame_module/1.
+:- public selecter/5.
+:- public parameter/4.
+
+:- use_module(library(semweb/rdf_db)).
+
+:- use_module(library(amalgame/rdf_util)).
+:- use_module(numeric_difference_match).
+:- use_module(label_selecter).
+
+parameter(type,
+	  oneof([source,target, all]), all,
+	 'Select all matches or pick best source/target to disambiguate').
+
+parameter(threshold, float, 0.05,
+           'threshold absolute difference').
+
+parameter(sourcelabel, oneof(LiteralProps), Default,
+	  '(Super)Property to get label of the source by') :-
+	rdf_equal(Default, rdfs:label),
+	rdf_literal_predicates(LiteralProps).
+
+parameter(targetlabel, oneof(LiteralProps), Default,
+	  '(Super)Property to get the label of the target by') :-
+	rdf_equal(Default, rdfs:label),
+	rdf_literal_predicates(LiteralProps).
+
+amalgame_module(amalgame:'NumericDifferenceSelecter').
+
+selecter(In, Sel, Dis, Und, Options) :-
+	label_selecter(numeric_difference_match, In, Sel, Dis, Und, Options).
diff --git a/rdf/tool/ag_modules.ttl b/rdf/tool/ag_modules.ttl
index 06b38ab..aaa6b6e 100644
--- a/rdf/tool/ag_modules.ttl
+++ b/rdf/tool/ag_modules.ttl
@@ -32,6 +32,11 @@ amalgame:IsubMatcher
     skos:definition "Generate new candidates based on similar labels. The matcher is based on the 'isub' metric introduced in 'A string metric for ontology alignment' by Giorgos Stoilos, 2005."@en ;
     rdfs:subClassOf amalgame:CandidateGenerator .
 
+amalgame:NumericDifferenceMatcher
+    rdfs:label "numeric/difference"@en ;
+    skos:definition "Generate new candidates based on similar numeric literals.	The score is the absolute difference of the two numeric literals compared, or infinite if these are not numbers."@en ;
+    rdfs:subClassOf amalgame:CandidateGenerator .
+
 amalgame:SelectPreLoaded
     rdfs:label "import/preloaded"@en ;
     skos:definition "Match mappings from preloaded named graph against concepts in source and target vocabularies"@en ;
@@ -77,6 +82,10 @@ amalgame:IsubSelecter
     skos:definition "Select mappings with the most similar labels, discard others for the same source/target.  If type=all, all candidates with sufficiently similar labels are selected. The matcher is based on the 'isub' metric introduced in 'A string metric for ontology alignment' by Giorgos Stoilos, 2005."@en ;
     rdfs:subClassOf amalgame:MappingPartitioner .
 
+amalgame:NumericDifferenceSelecter
+    rdfs:label "numeric/difference"@en ;
+    skos:definition "Select mappings with similar numeric literals. If type=all, all candidates with sufficiently similar numbers are selected. The score is the absolute difference of the two numeric literals compared, or infinite if these are not numbers."@en ;
+    rdfs:subClassOf amalgame:MappingPartitioner .
 amalgame:SelectPreLoadedSelecter
     rdfs:label "import/preloaded"@en ;
     skos:definition "Select mappings with corresponding mappings in the preloaded mapping, discard others with the same source/target."@en ;
@@ -108,7 +117,7 @@ amalgame:Sampler
     rdfs:subClassOf amalgame:MappingPartitioner .
 
 amalgame:BestNumeric
-    rdfs:label "best numeric"@en ;
+    rdfs:label "best numeric score"@en ;
     skos:definition "Select correspondences that have the best score considering some numerical ranking, discard others."@en ;
     rdfs:subClassOf amalgame:MappingPartitioner .