xmlrdf/commit
added normalisation options to literal_to_id
author | Jacco van Ossenbruggen |
---|---|
Tue May 7 13:48:30 2013 +0200 | |
committer | Jacco van Ossenbruggen |
Tue May 7 15:33:14 2013 +0200 | |
commit | 9bc4c096260b6a3452d0da2afddbd15717bd313d |
tree | 524ea1c76cae9c64cc9902f82c182afddbef09d4 |
parent | 8b1beecd6d7fc17ef101c1fb3fa7d5e570150373 |
Diff style: patch stat
diff --git a/lib/xmlrdf/rdf_convert_util.pl b/lib/xmlrdf/rdf_convert_util.pl index 436b8d6..7a984dc 100644 --- a/lib/xmlrdf/rdf_convert_util.pl +++ b/lib/xmlrdf/rdf_convert_util.pl @@ -2,6 +2,7 @@ [ rdf_literal/1, % @Term type_time_literal/2, % +Literal, -TypedLiteral literal_to_id/3, % +Literal, +NameSpace, -Id + literal_to_id/4, % +Literal, +NameSpace, -Id, +Options name_to_id/3, % +Literal, +NameSpace, -Id edm_identifier/4 % +URI, +Orig, -New, NewURI ]). @@ -49,23 +50,30 @@ name_to_id(Literal, NS, ID) :- % literal_to_id([ParentLit, '-', Literal], NS, ID) % == % +% Options may include: +% +% * accents(normalize/disable), defaults to normalize +% * underscores(normalize/disable), defaults to normalize +% % @tbd Verify that the generated URI is unique! % @tbd Remove diacritics for non-iso-latin-1 text -literal_to_id(Literals, NS, URI) :- +literal_to_id(Literals, NS, URI, Options):- is_list(Literals), !, - maplist(literal_to_id, Literals, IDs), + maplist(literal_to_id_nn(Options), Literals, IDs), atomic_list_concat(IDs, ID), rdf_current_ns(NS, Prefix), atom_concat(Prefix, ID, URI). -literal_to_id(Literal, NS, URI) :- - literal_to_id(Literal, ID), +literal_to_id(Literal, NS, URI, Options) :- + literal_to_id_nn(Options, Literal, ID), rdf_current_ns(NS, Prefix), atom_concat(Prefix, ID, URI). +literal_to_id(Literals, NS, URI) :- + literal_to_id(Literals, NS, URI, []). -literal_to_id(Literal, ID) :- +literal_to_id_nn(Options, Literal, ID) :- text_of_literal(Literal, Text), - text_to_id(Text, ID). + text_to_id(Text, ID, Options). text_of_literal(Var, _) :- var(Var), !, @@ -77,11 +85,17 @@ text_of_literal(lang(_, Text), Text). text_of_literal(Text, Text) :- atomic(Text). -text_to_id(Text, Id) :- - unaccent(Text, T1), +text_to_id(Text, Id, Options) :- + ( option(accents(normalize), Options, normalize) + -> unaccent(Text, T1) + ; T1 = Text + ), atom_codes(T1, Codes), maplist(map_non_id_char, Codes, Codes1), - normalize_underscores(Codes1, Codes2), + ( option(underscores(normalize), Options, normalize) + -> normalize_underscores(Codes1, Codes2) + ; Codes2 = Codes1 + ), atom_codes(Id, Codes2). :- if(exists_source(library(unicode))).