View source with raw comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2007-2020, VU University Amsterdam
    7                              CWI, Amsterdam
    8    All rights reserved.
    9
   10    Redistribution and use in source and binary forms, with or without
   11    modification, are permitted provided that the following conditions
   12    are met:
   13
   14    1. Redistributions of source code must retain the above copyright
   15       notice, this list of conditions and the following disclaimer.
   16
   17    2. Redistributions in binary form must reproduce the above copyright
   18       notice, this list of conditions and the following disclaimer in
   19       the documentation and/or other materials provided with the
   20       distribution.
   21
   22    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   23    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   24    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   25    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   26    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   27    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   28    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   29    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   30    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   31    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   32    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   33    POSSIBILITY OF SUCH DAMAGE.
   34*/
   35
   36:- module(rdf_cache,
   37          [ rdf_set_cache_options/1,    % +Options
   38            rdf_cache_file/3            % +URL, +RW, -File
   39          ]).   40:- autoload(library(error),[must_be/2,domain_error/2]).   41:- autoload(library(filesex),[make_directory_path/1]).

Cache RDF triples

The library library(semweb/rdf_cache) defines the caching strategy for triples sources. When using large RDF sources, caching triples greatly speedup loading RDF documents. The cache library implements two caching strategies that are controlled by rdf_set_cache_options/1.

Local caching This approach applies to files only. Triples are cached in a sub-directory of the directory holding the source. This directory is called .cache (_cache on Windows). If the cache option create_local_directory is true, a cache directory is created if posible.

Global caching This approach applies to all sources, except for unnamed streams. Triples are cached in directory defined by the cache option global_directory.

When loading an RDF file, the system scans the configured cache files unless cache(false) is specified as option to rdf_load/2 or caching is disabled. If caching is enabled but no cache exists, the system will try to create a cache file. First it will try to do this locally. On failure it will try to configured global cache. */

   67:- dynamic
   68    cache_option/1.   69
   70set_setfault_options :-
   71    assert(cache_option(enabled(true))),
   72    (   current_prolog_flag(windows, true)
   73    ->  assert(cache_option(local_directory('_cache')))
   74    ;   assert(cache_option(local_directory('.cache')))
   75    ).
   76
   77:- set_setfault_options.                % _only_ when loading!
 rdf_set_cache_options(+Options)
Change the cache policy. Provided options are:
   99rdf_set_cache_options([]) :- !.
  100rdf_set_cache_options([H|T]) :-
  101    !,
  102    rdf_set_cache_options(H),
  103    rdf_set_cache_options(T).
  104rdf_set_cache_options(Opt) :-
  105    functor(Opt, Name, Arity),
  106    arg(1, Opt, Value),
  107    (   cache_option(Name, Type)
  108    ->  must_be(Type, Value)
  109    ;   domain_error(cache_option, Opt)
  110    ),
  111    functor(Gen, Name, Arity),
  112    retractall(cache_option(Gen)),
  113    expand_option(Opt, EOpt),
  114    assert(cache_option(EOpt)).
  115
  116cache_option(enabled,                 boolean).
  117cache_option(local_directory,         atom).
  118cache_option(create_local_directory,  boolean).
  119cache_option(global_directory,        atom).
  120cache_option(create_global_directory, boolean).
  121
  122expand_option(global_directory(Local), global_directory(Global)) :-
  123    !,
  124    absolute_file_name(Local, Global).
  125expand_option(Opt, Opt).
 rdf_cache_file(+URL, +ReadWrite, -File) is semidet
File is the cache file for URL. If ReadWrite is read, it returns the name of an existing file. If write it returns where a new cache file can be overwritten or created.
  134rdf_cache_file(_URL, _, _File) :-
  135    cache_option(enabled(false)),
  136    !,
  137    fail.
  138rdf_cache_file(URL, read, File) :-
  139    !,
  140    (   atom_concat('file://', Path, URL),
  141        cache_option(local_directory(Local)),
  142        file_directory_name(Path, Dir),
  143        local_cache_file(URL, LocalFile),
  144        atomic_list_concat([Dir, Local, LocalFile], /, File)
  145    ;   cache_option(global_directory(Dir)),
  146        url_cache_file(URL, Dir, trp, read, File)
  147    ),
  148    access_file(File, read),
  149    !.
  150rdf_cache_file(URL, write, File) :-
  151    !,
  152    (   atom_concat('file://', Path, URL),
  153        cache_option(local_directory(Local)),
  154        file_directory_name(Path, Dir),
  155        (   cache_option(create_local_directory(true))
  156        ->  RWDir = write
  157        ;   RWDir = read
  158        ),
  159        ensure_dir(Dir, Local, RWDir, CacheDir),
  160        local_cache_file(URL, LocalFile),
  161        atomic_list_concat([CacheDir, LocalFile], /, File)
  162    ;   cache_option(global_directory(Dir)),
  163        ensure_global_cache(Dir),
  164        url_cache_file(URL, Dir, trp, write, File)
  165    ),
  166    access_file(File, write),
  167    !.
  168
  169
  170ensure_global_cache(Dir) :-
  171    exists_directory(Dir),
  172    !.
  173ensure_global_cache(Dir) :-
  174    cache_option(create_global_directory(true)),
  175    make_directory_path(Dir),
  176    print_message(informational, rdf(cache_created(Dir))).
  177
  178
  179                 /*******************************
  180                 *         LOCAL CACHE          *
  181                 *******************************/
 local_cache_file(+FileURL, -File) is det
Return the name of the cache file for FileURL. The name is the plain filename with the .trp extension. As the URL is a file URL, it is guaranteed to be a valid filename. Assumes the hosting OS can handle multiple exensions (.x.y) though. These days thats even true on Windows.
  191local_cache_file(URL, File) :-
  192    file_base_name(URL, Name),
  193    file_name_extension(Name, trp, File).
  194
  195
  196                 /*******************************
  197                 *         GLOBAL CACHE         *
  198                 *******************************/
 url_cache_file(+URL, +Dir, +Ext, +RW, -Path) is semidet
Determine location of cache-file for the given URL in Dir. If Ext is provided, the returned Path is ensured to have the specified extension.
Arguments:
RW- If read, no directories are created and the call fails if URL is not in the cache.
  209url_cache_file(URL, Dir, Ext, RW, Path) :-
  210    term_hash(URL, Hash0),
  211    Hash is Hash0 + 100000,         % make sure > 4 characters
  212    format(string(Hex), '~16r', [Hash]),
  213    sub_atom(Hex, _, 2, 0, L1),
  214    ensure_dir(Dir, L1, RW, Dir1),
  215    sub_atom(Hex, _, 2, 2, L2),
  216    ensure_dir(Dir1, L2, RW, Dir2),
  217    url_to_file(URL, File),
  218    ensure_ext(File, Ext, FileExt),
  219    atomic_list_concat([Dir2, /, FileExt], Path).
  220
  221ensure_dir(D0, Sub, RW, Dir) :-
  222    atomic_list_concat([D0, /, Sub], Dir),
  223    (   exists_directory(Dir)
  224    ->  true
  225    ;   RW == write
  226    ->  catch(make_directory(Dir), _, fail)
  227    ).
  228
  229ensure_ext(File, '', File) :- !.
  230ensure_ext(File, Ext, File) :-
  231    file_name_extension(_, Ext, File),
  232    !.
  233ensure_ext(File, Ext, FileExt) :-
  234    file_name_extension(File, Ext, FileExt).
 url_to_file(+URL, -File)
Convert a URL in something that fits in a file, i.e. avoiding / and :. We simply replace these by -. We could also use www_form_encode/2, but confusion when to replace as well as the fact that we loose the '.' (extension) makes this a less ideal choice. We could also consider base64 encoding of the name.
  244url_to_file(URL, File) :-
  245    atom_codes(URL, Codes),
  246    phrase(safe_file_name(Codes), FileCodes),
  247    atom_codes(File, FileCodes).
  248
  249safe_file_name([]) -->
  250    [].
  251safe_file_name([H|T]) -->
  252    replace(H),
  253    !,
  254    safe_file_name(T).
  255safe_file_name([H|T]) -->
  256    [H],
  257    safe_file_name(T).
 replace(+Code)//
Replace a character code that cannot safely be put in a filename. Should we use %XX?
  264replace(0'/)  --> "-".                  % directory separator
  265replace(0'\\) --> "-".                  % not allowed in Windows filename
  266replace(0':)  --> "-".                  % idem
  267replace(0'?)  --> "-".                  % idem
  268replace(0'*)  --> "-".                  % idem
  269
  270
  271                 /*******************************
  272                 *             MESSAGES         *
  273                 *******************************/
  274
  275:- multifile prolog:message/3.  276
  277prolog:message(rdf(cache_created(Dir))) -->
  278    [ 'Created RDF cache directory ~w'-[Dir] ]