find_resource/commit

ADDED: boolean tokenize option to support non-tokenized literal search

authorJacco van Ossenbruggen
Fri Aug 22 12:03:38 2014 +0200
committerJacco van Ossenbruggen
Fri Aug 22 12:03:38 2014 +0200
commit4e9b5e394462cd1e6ca6f0cf3ff4da499c47ee18
tree3811a14e215b6948f5966b8362f06dc8ae3c7f2e
parent19af3b54d22df49dfbb360afcd4d3c230f86ff8a
Diff style: patch stat
diff --git a/lib/find_resource.pl b/lib/find_resource.pl
index d90031f..be56ba0 100644
--- a/lib/find_resource.pl
+++ b/lib/find_resource.pl
@@ -6,6 +6,7 @@
 :- use_module(library(semweb/rdf_db)).
 :- use_module(library(semweb/rdfs)).
 :- use_module(library(semweb/rdf_litindex)).
+:- use_module(library(semweb/rdf_label)).
 
 % force indexing
 :- rdf_find_literals(zzz,_).
@@ -18,9 +19,17 @@
 %	Find a resource based on a Name.
 %
 %       Options
+%       * Tokenize
+%       Boolean, defaults to true.
+%
 %       * Match
-%	one of case, stem, prefix. Method to match the Name
-%	against the literal index. Default is case
+%	When Tokenize == true, Match should be one of case, stem,
+%	prefix. Method to match the Name against the literal index.
+%	Default is case. See rdf_find_literals/2 for details.
+%
+%	When Tokenize == false, Match should be one of the
+%	literal search methods supported by rdf/3, including prefix,
+%	plain, and exact, but _not_ stem or case.
 %
 %	* Distance
 %	Boolean, compute literal distance if true. Default is true. The
@@ -38,10 +47,13 @@ find_resource_by_name(Name, Hits, Options) :-
 	option(match(Match), Options, case),
 	option(distance(Distance), Options, true),
 	option(attributes(Attributes), Options, [P-0]),
-	catch(findall(D-Label, find_literal_distance(Distance, Name, Match, Label, D), Pairs),
+	option(tokenize(Tokenize), Options, true),
+	catch(findall(D-Label,
+		      find_literal_distance(Distance, Tokenize, Name, Match, Label, D),
+		      Pairs),
 	      no_stem(_),
 	      fail),
- 	findall(Hit, uri_with_label_in(Pairs, Attributes, Hit), Hits1),
+	findall(Hit, uri_with_label_in(Pairs, Attributes, Hit), Hits1),
 	sort(Hits1, Hits2),			% sort by URI
 	remove_dup_uris(Hits2, Hits3),	        % take lowest on URI
 	sort(Hits3, Hits).			% sort by distance
@@ -69,15 +81,15 @@ uri_with_label_in(LabelPairs, Attributes, hit(URI, Distance, P, Label)) :-
 
 
 		 /*******************************
-		 *	  find literals  	*
+		 *	  find literals		*
 		 *******************************/
 
-find_literal_distance(true, Tokens, MatchType, Label, Distance) :-
+find_literal_distance(true, Tokenize, Literal, MatchType, Label, Distance) :-
 	!,
-	find_literal(Tokens, MatchType, Label),
-	literal_distance(Tokens, Label, Distance).
-find_literal_distance(_, Tokens, MatchType, Label, 0) :-
-	find_literal(Tokens, MatchType, Label).
+	find_literal(Literal, Tokenize, MatchType, Label),
+	literal_distance(Literal, Label, Distance).
+find_literal_distance(_, Tokenize, Literal, MatchType, Label, 0) :-
+	find_literal(Literal, Tokenize, MatchType, Label).
 
 %%	find_literal(+TextOrTokens, +MatchType, -Literal)
 %
@@ -87,13 +99,21 @@ find_literal_distance(_, Tokens, MatchType, Label, 0) :-
 %	rdf_tokenize_literal/2.
 %	MatchType is one of case, stem or prefix.
 
-
 find_literal(Text, MatchType, Literal) :-
+	find_literal(Text, true, MatchType, Literal).
+
+find_literal(Text, true, MatchType, Literal) :-
 	tokens(Text, Tokens),
 	list_to_and(Tokens, MatchType, And),
 	rdf_find_literals(And, Literals),
 	member(Literal, Literals).
 
+find_literal(Text, false, MatchType, Result) :-
+	M =.. [ MatchType, Text ],
+	L =.. [ literal, M, Literal ],
+	rdf(_,_,L),
+	literal_text(Literal, Result).
+
 
 %%	list_to_and(+TextOrTokens, +MatchType, -AndTerm)
 %