tag_matcher/commit

API to semantically match two tags using the relations within one or more SKOS vocabularies

authorMichiel Hildebrand
Sat Jan 22 18:12:52 2011 +0100
committerMichiel Hildebrand
Sat Jan 22 18:12:52 2011 +0100
commit2dbeeb2fcc7ad73722c71cb9ae6e7591f18a66d8
treee001af8c4f8cf455d95512c094728324ca4a93d4
parentd3b1b5386b2e8ebfe77c1215929a616ab081e9f7
Diff style: patch stat
diff --git a/api/tag_matcher.pl b/api/tag_matcher.pl
new file mode 100644
index 0000000..95065a9
--- /dev/null
+++ b/api/tag_matcher.pl
@@ -0,0 +1,223 @@
+:- module(tag_matcher,
+	  [ find_tag_concept/3
+	  ]).
+
+:- use_module(library(http/http_dispatch)).
+:- use_module(library(http/http_parameters)).
+:- use_module(library(http/html_write)).
+:- use_module(library(http/html_head)).
+:- use_module(library(http/http_json)).
+:- use_module(library(http/json)).
+:- use_module(library(http/json_convert)).
+:- use_module(library(semweb/rdf_db)).
+:- use_module(library(semweb/rdfs)).
+:- use_module(library(semweb/rdf_litindex)).
+:- use_module(library(find_resource)).
+:- use_module(components(label)).
+
+/** <module> Tag matcher
+
+This module provides a web service for fuzzy matching of two strings
+(e.g. tags).
+
+@author Michiel Hildebrand
+*/
+
+:- http_handler(root(tagmatch), http_tag_match_page, []).
+:- http_handler(root(api/tagmatch), http_tag_match_api, []).
+
+:- rdf_register_ns(gtaa, 'http://data.beeldengeluid.nl/gtaa/').
+:- rdf_register_ns(cornetto, 'http://purl.org/vocabularies/cornetto/').
+
+:- rdf_meta
+        scheme_type(r,-).
+
+%%	http_tag_match_page(+Request)
+%
+%	HTTP handler to create a simple web page with a form to enter
+%	and match two tags.
+
+http_tag_match_page(Request) :-
+	http_parameters(Request,
+			[ t1(Tag1,
+			     [optional(true), description('a tag')]),
+			  t2(Tag2,
+			     [optional(true), description('another tag')])
+			]),
+	(   nonvar(Tag1),
+	    nonvar(Tag2)
+	->  tag_match(Tag1, Tag2, T1Data, T2Data, Type)
+	;   true
+	),
+	reply_html_page(cliopatria(default),
+			[title('Tag matcher'),
+			 style(['h2 { border-bottom: 1px solid #CCC;}',
+ 				'.tag-input-line,
+			         .tag-data-line
+				     { margin-bottom: 5px; } ',
+ 				'table {border: 1px solid #CCC; }',
+				'td { padding: 2px 10px; }'
+			       ])
+			],
+			[h2('tag matcher'),
+ 			 div(class('tag-input'),
+			     form(action(location_by_id(http_tag_match_page)),
+				  [ \html_tag_input(1, Tag1),
+				    \html_tag_input(2, Tag2),
+				    input([type(submit), value(match)]),
+				    ' examples: ',
+				    \html_match_examples
+				  ])),
+			 div(class('tag-match'),
+			     \html_tag_match(Type)),
+			 div(class('tag-data'),
+			     [ \html_tag_data(1, T1Data),
+			       \html_tag_data(2, T2Data)
+			     ])
+			]).
+
+html_tag_input(N, Tag) -->
+	{ (   var(Tag)
+	  ->  Txt = ''
+	  ;   Txt = Tag
+	  )
+	},
+	html(div(class('tag-input-line'),
+		[ label(['tag ', N, ': ']),
+		  input([type(text), name(t+N), value(Txt)])
+		])).
+
+html_tag_data(_, TagData) -->
+	{ var(TagData) ; TagData = [] },
+	!.
+html_tag_data(N, TagData) -->
+	html(div(class('tag-data-line'),
+		 [ label(['tag ', N]),
+		   table(\tag_data_rows(TagData))
+		 ])).
+
+tag_data_rows([]) --> !.
+tag_data_rows([K=V|T]) -->
+	html(tr([td(\data_cell(K)), td(\data_cell(V))])),
+ 	tag_data_rows(T).
+data_cell(R) -->
+	{ atom(R),
+	  rdf_subject(R)
+	},
+	!,
+	rdf_link(R).
+data_cell(R) -->
+	html(R).
+
+html_tag_match(Type) -->
+	{ var(Type) },
+	!.
+html_tag_match(@false) -->
+	html(h3('no match found')).
+html_tag_match(Type) -->
+	html(h3(['match: ', Type])).
+
+html_match_examples -->
+	html([\example_link(exact, 'obama', 'obama'),
+	      ' | ',
+	      \example_link(synonym, 'auto', 'wagen'),
+	      ' | ',
+	      \example_link(specific, labrador, hond),
+	      ' | ',
+	      \example_link(generic, paal, amsterdammertje)
+	     ]).
+
+example_link(Name, Tag1, Tag2) -->
+	{ http_location_by_id(http_tag_match_page, Link)
+	},
+	html(a(href(Link+'?t1='+Tag1+'&t2='+Tag2), Name)).
+
+
+%%	http_tag_match_api(+Request)
+%
+%	HTTP handler to determine the match between two tags.
+
+http_tag_match_api(Request) :-
+	http_parameters(Request,
+			[ t1(Tag1, [description('a tag')]),
+			  t2(Tag2, [description('another tag')])
+			]),
+ 	tag_match(Tag1, Tag2, T1Data, T2Data, Type),
+	reply_json(json([t1=json([tag=Tag1|T1Data]),
+			 t2=json([tag=Tag2|T2Data]),
+			 match=Type
+			])).
+
+tag_match(T, T, TData, _, exact) :-	!,
+	(   find_tag_concept(T, _R, Hit)
+	->  tag_data(Hit, TData)
+	;   TData = []
+ 	).
+tag_match(T1, T2, TData, _, synonym) :-
+	find_tag_concept(T1, R, Hit),
+	find_tag_concept(T2, R, _Hit),
+	!,
+	tag_data(Hit, TData).
+tag_match(T1, T2, T1Data, T2Data, specific) :-
+	find_tag_concept(T1, R1, Hit1),
+	find_tag_concept(T2, R2, Hit2),
+	ancestor_of(R1, R2),
+	!,
+	tag_data(Hit1, T1Data),
+	tag_data(Hit2, T2Data).
+tag_match(T1, T2, T1Data, T2Data, generic) :-
+	find_tag_concept(T1, R1, Hit1),
+	find_tag_concept(T2, R2, Hit2),
+	ancestor_of(R2, R1),
+	!,
+	tag_data(Hit1, T1Data),
+	tag_data(Hit2, T2Data).
+tag_match(_, _, [], [], @false).
+
+tag_data(hit(D,R,P,L), Data) :-
+	Data = [uri=R, match=L, matchtype=P, distance=D, type=Type],
+	tag_concept_type(R, Type).
+
+
+%%	find_tag_concept(?String, ?Concept)
+%
+%	String is a label of Concept.
+
+find_tag_concept(Tag, R, Hit) :-
+	rdf_equal(rdfs:label, Label),
+	rdf_equal(skos:prefLabel, PrefLabel),
+	rdf_equal(cornetto:synonym, Syn),
+	LabelList = [Syn-0,
+		     PrefLabel-0,
+		     Label-1
+		    ],
+	Options = [match(case),
+		   distance(true),
+		   attributes(LabelList)
+		  ],
+	find_resource_by_name(Tag, Hits, Options),
+	member(Hit, Hits),
+	Hit = hit(_,R,_,_).
+
+
+%%	tag_concept_type(+Concept, -Type)
+%
+%	Type is one of predefined tag types of Concept.
+
+tag_concept_type(R, Type) :-
+	rdf(R, skos:inScheme, Scheme),
+	scheme_type(Scheme, Type),
+	!.
+tag_concept_type(_, unknown).
+
+scheme_type(gtaa:'Maker', person) :- !.
+scheme_type(gtaa:'Namen', person):- !.
+scheme_type(gtaa:'Persoonsnamen', person) :- !.
+scheme_type(gtaa:'GeografischeNamen', location) :- !.
+
+%%	ancestor_of(+R, +Ancestor)
+%
+%	True if Ancestor is reachable by skos:broader from R.
+
+ancestor_of(R, A) :-
+	rdf_reachable(R, skos:broader, A).
diff --git a/config-available/tag_matcher.pl b/config-available/tag_matcher.pl
index 94bc0cc..80fc3c5 100644
--- a/config-available/tag_matcher.pl
+++ b/config-available/tag_matcher.pl
@@ -3,3 +3,4 @@
 /** <module> Web service to semantically match two tags
 */
 
+:- use_module(api(tag_matcher)).
diff --git a/rdf/cpack/tag_matcher.ttl b/rdf/cpack/tag_matcher.ttl
index 4280f98..b88cc6d 100644
--- a/rdf/cpack/tag_matcher.ttl
+++ b/rdf/cpack/tag_matcher.ttl
@@ -14,10 +14,7 @@
 <> a cpack:Application ;
 	cpack:packageName "tag_matcher" ;
 	dcterms:title "Web service to semantically match two tags" ;
-	cpack:author [ a foaf:Person ;
-		       foaf:name "@FOAFNAME@" ;
-		       foaf:mbox "@FOAFMBOX@" ;
-		     ] ;
+	cpack:author <http://www.few.vu.nl/~michielh/me>  ;
 	cpack:primaryRepository
 	    [ a cpack:GitRepository ;
 	      cpack:gitURL <git://semanticweb.cs.vu.nl/home/hildebra/git/ClioPatria/tag_matcher.git>