1/* Part of ClioPatria SeRQL and SPARQL server 2 3 Author: Jan Wielemaker 4 E-mail: J.Wielemaker@vu.nl 5 WWW: http://www.swi-prolog.org 6 Copyright (c) 2011-2018, University of Amsterdam, 7 VU University Amsterdam 8 All rights reserved. 9 10 Redistribution and use in source and binary forms, with or without 11 modification, are permitted provided that the following conditions 12 are met: 13 14 1. Redistributions of source code must retain the above copyright 15 notice, this list of conditions and the following disclaimer. 16 17 2. Redistributions in binary form must reproduce the above copyright 18 notice, this list of conditions and the following disclaimer in 19 the documentation and/or other materials provided with the 20 distribution. 21 22 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 25 FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 26 COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 27 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 28 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 29 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 30 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 32 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 33 POSSIBILITY OF SUCH DAMAGE. 34*/ 35 36:- module(rdf_file_type, 37 [ rdf_guess_data_format/2, % +Stream, ?Format 38 rdf_guess_format_and_load/2 % +Stream, +Options 39 ]). 40:- use_module(library(semweb/rdf_db)). 41:- use_module(library(memfile)). 42:- use_module(library(sgml)). 43:- use_module(library(lists)). 44:- use_module(library(apply)). 45:- use_module(library(option)). 46:- if(exists_source(library(archive))). 47:- use_module(library(archive)). 48:- endif.
64rdf_guess_format_and_load(Stream, Options) :- 65 option(format(_), Options), 66 !, 67 rdf_load(stream(Stream), Options). 68:- if(current_predicate(archive_data_stream/3)). 69rdf_guess_format_and_load(Stream, Options) :- 70 setup_call_cleanup( 71 archive_open(Stream, Archive, [format(all),format(raw)]), 72 forall(archive_data_stream(Archive, DataStream, [meta_data(MetaData)]), 73 call_cleanup( 74 ( member_base_uri(MetaData, Options, Options2), 75 option(base_uri(Base), Options2, 'http://example.org/'), 76 set_stream(DataStream, file_name(Base)), 77 ( file_base_name(Base, FileName), 78 non_rdf_file(FileName) 79 -> true 80 ; rdf_guess_data_format(DataStream, Format) 81 -> rdf_load(stream(DataStream), [format(Format)|Options2]) 82 ; true 83 ) 84 ), 85 close(DataStream))), 86 archive_close(Archive)). 87 88member_base_uri([_], Options, Options) :- !. 89member_base_uri(MetaData, Options0, Options) :- 90 append(MetaPath, [_], MetaData), 91 maplist(get_dict(name), MetaPath, MetaSegments), 92 select_option(base_uri(Base0), Options0, Options1, 'http://archive.org'), 93 atomic_list_concat([Base0|MetaSegments], /, Base), 94 Options = [base_uri(Base)|Options1]. 95:- else. 96rdf_guess_format_and_load(Stream, Options) :- 97 rdf_guess_data_format(Stream, Format), 98 rdf_load(stream(Stream), [format(Format)|Options]). 99:- endif. 100 101non_rdf_file(File) :- 102 file_name_extension(Base, Ext, File), 103 ( non_rdf_ext(Ext) 104 -> true 105 ; downcase_atom(Base, Lower), 106 non_rdf_base(Lower) 107 ). 108 109non_rdf_ext(pdf). 110non_rdf_ext(txt). 111non_rdf_ext(md). 112non_rdf_ext(doc). 113 114non_rdf_base(readme). 115non_rdf_base(todo).
126rdf_guess_data_format(_, Format) :- 127 nonvar(Format), 128 !. 129rdf_guess_data_format(Stream, xml) :- 130 xml_doctype(Stream, _), 131 !. 132rdf_guess_data_format(Stream, Format) :- 133 stream_property(Stream, file_name(File)), 134 file_name_extension(_, Ext, File), 135 rdf_db:rdf_file_type(Ext, Format), 136 !. 137rdf_guess_data_format(_, turtle).
Note that it is not possible to define valid RDF/XML without namespaces, while it is not possible to define a valid absolute Turtle URI (using <URI>) with a valid xmlns declaration.
154:- if(current_predicate(peek_string/3)). 155xml_doctype(Stream, DocType) :- 156 peek_string(Stream, 4096, Start), 157 setup_call_cleanup( 158 open_string_stream(Start, In), 159 xml_doctype_2(In, DocType), 160 close(In)). 161:- else. 162xml_doctype(Stream, DocType) :- 163 xml_doctype_2(Stream, DocType). 164:- endif. 165 166xml_doctype_2(Stream, DocType) :- 167 catch(setup_call_cleanup(make_parser(Stream, Parser, State), 168 sgml_parse(Parser, 169 [ source(Stream), 170 max_errors(1), 171 syntax_errors(quiet), 172 call(begin, on_begin), 173 call(cdata, on_cdata) 174 ]), 175 cleanup_parser(Stream, Parser, State)), 176 E, true), 177 nonvar(E), 178 E = tag(DocType). 179 180make_parser(Stream, Parser, state(Pos)) :- 181 stream_property(Stream, position(Pos)), 182 new_sgml_parser(Parser, []), 183 set_sgml_parser(Parser, dialect(xmlns)). 184 185cleanup_parser(Stream, Parser, state(Pos)) :- 186 free_sgml_parser(Parser), 187 set_stream_position(Stream, Pos). 188 189on_begin(Tag, Attributes, _Parser) :- 190 memberchk(xmlns:_=_, Attributes), 191 throw(tag(Tag)). 192 193on_cdata(_CDATA, _Parser) :- 194 throw(error(cdata)). 195 196 197open_string_stream(String, Stream) :- 198 new_memory_file(MF), 199 setup_call_cleanup( 200 open_memory_file(MF, write, Out), 201 format(Out, '~s', [String]), 202 close(Out)), 203 open_memory_file(MF, read, Stream, 204 [ free_on_close(true) 205 ])
Load RDF data from unknown file-type
*/