View source with formatted comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2007-2020, VU University Amsterdam
    7                              CWI, Amsterdam
    8    All rights reserved.
    9
   10    Redistribution and use in source and binary forms, with or without
   11    modification, are permitted provided that the following conditions
   12    are met:
   13
   14    1. Redistributions of source code must retain the above copyright
   15       notice, this list of conditions and the following disclaimer.
   16
   17    2. Redistributions in binary form must reproduce the above copyright
   18       notice, this list of conditions and the following disclaimer in
   19       the documentation and/or other materials provided with the
   20       distribution.
   21
   22    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   23    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   24    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   25    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   26    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   27    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   28    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   29    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   30    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   31    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   32    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   33    POSSIBILITY OF SUCH DAMAGE.
   34*/
   35
   36:- module(rdf_cache,
   37          [ rdf_set_cache_options/1,    % +Options
   38            rdf_cache_file/3            % +URL, +RW, -File
   39          ]).   40:- autoload(library(error),[must_be/2,domain_error/2]).   41:- autoload(library(filesex),[make_directory_path/1]).   42
   43/** <module> Cache RDF triples
   44
   45The library library(semweb/rdf_cache) defines the   caching strategy for
   46triples sources. When using large RDF   sources, caching triples greatly
   47speedup loading RDF documents. The cache  library implements two caching
   48strategies that are controlled by rdf_set_cache_options/1.
   49
   50*|Local caching|* This approach  applies  to   files  only.  Triples are
   51cached in a sub-directory of  the   directory  holding  the source. This
   52directory is called =|.cache|= (=|_cache|=  on   Windows).  If the cache
   53option =create_local_directory= is =true=, a  cache directory is created
   54if posible.
   55
   56*|Global caching|* This approach applies  to   all  sources,  except for
   57unnamed streams. Triples are cached in   directory  defined by the cache
   58option =global_directory=.
   59
   60When loading an RDF file, the system   scans  the configured cache files
   61unless cache(false) is specified as option   to rdf_load/2 or caching is
   62disabled. If caching is enabled but no cache exists, the system will try
   63to create a cache file. First it will try to do this locally. On failure
   64it will try to configured global cache.
   65*/
   66
   67:- dynamic
   68    cache_option/1.   69
   70set_setfault_options :-
   71    assert(cache_option(enabled(true))),
   72    (   current_prolog_flag(windows, true)
   73    ->  assert(cache_option(local_directory('_cache')))
   74    ;   assert(cache_option(local_directory('.cache')))
   75    ).
   76
   77:- set_setfault_options.                % _only_ when loading!
   78
   79%!  rdf_set_cache_options(+Options)
   80%
   81%   Change the cache policy.  Provided options are:
   82%
   83%     * enabled(Boolean)
   84%     If =true=, caching is enabled.
   85%
   86%     * local_directory(Name).
   87%     Plain name of local directory.  Default =|.cache|=
   88%     (=|_cache|= on Windows).
   89%
   90%     * create_local_directory(Bool)
   91%     If =true=, try to create local cache directories
   92%
   93%     * global_directory(Dir)
   94%     Writeable directory for storing cached parsed files.
   95%
   96%     * create_global_directory(Bool)
   97%     If =true=, try to create the global cache directory.
   98
   99rdf_set_cache_options([]) :- !.
  100rdf_set_cache_options([H|T]) :-
  101    !,
  102    rdf_set_cache_options(H),
  103    rdf_set_cache_options(T).
  104rdf_set_cache_options(Opt) :-
  105    functor(Opt, Name, Arity),
  106    arg(1, Opt, Value),
  107    (   cache_option(Name, Type)
  108    ->  must_be(Type, Value)
  109    ;   domain_error(cache_option, Opt)
  110    ),
  111    functor(Gen, Name, Arity),
  112    retractall(cache_option(Gen)),
  113    expand_option(Opt, EOpt),
  114    assert(cache_option(EOpt)).
  115
  116cache_option(enabled,                 boolean).
  117cache_option(local_directory,         atom).
  118cache_option(create_local_directory,  boolean).
  119cache_option(global_directory,        atom).
  120cache_option(create_global_directory, boolean).
  121
  122expand_option(global_directory(Local), global_directory(Global)) :-
  123    !,
  124    absolute_file_name(Local, Global).
  125expand_option(Opt, Opt).
  126
  127
  128%!  rdf_cache_file(+URL, +ReadWrite, -File) is semidet.
  129%
  130%   File is the cache file  for  URL.   If  ReadWrite  is =read=, it
  131%   returns the name of an existing file.  If =write= it returns
  132%   where a new cache file can be overwritten or created.
  133
  134rdf_cache_file(_URL, _, _File) :-
  135    cache_option(enabled(false)),
  136    !,
  137    fail.
  138rdf_cache_file(URL, read, File) :-
  139    !,
  140    (   atom_concat('file://', Path, URL),
  141        cache_option(local_directory(Local)),
  142        file_directory_name(Path, Dir),
  143        local_cache_file(URL, LocalFile),
  144        atomic_list_concat([Dir, Local, LocalFile], /, File)
  145    ;   cache_option(global_directory(Dir)),
  146        url_cache_file(URL, Dir, trp, read, File)
  147    ),
  148    access_file(File, read),
  149    !.
  150rdf_cache_file(URL, write, File) :-
  151    !,
  152    (   atom_concat('file://', Path, URL),
  153        cache_option(local_directory(Local)),
  154        file_directory_name(Path, Dir),
  155        (   cache_option(create_local_directory(true))
  156        ->  RWDir = write
  157        ;   RWDir = read
  158        ),
  159        ensure_dir(Dir, Local, RWDir, CacheDir),
  160        local_cache_file(URL, LocalFile),
  161        atomic_list_concat([CacheDir, LocalFile], /, File)
  162    ;   cache_option(global_directory(Dir)),
  163        ensure_global_cache(Dir),
  164        url_cache_file(URL, Dir, trp, write, File)
  165    ),
  166    access_file(File, write),
  167    !.
  168
  169
  170ensure_global_cache(Dir) :-
  171    exists_directory(Dir),
  172    !.
  173ensure_global_cache(Dir) :-
  174    cache_option(create_global_directory(true)),
  175    make_directory_path(Dir),
  176    print_message(informational, rdf(cache_created(Dir))).
  177
  178
  179                 /*******************************
  180                 *         LOCAL CACHE          *
  181                 *******************************/
  182
  183%!  local_cache_file(+FileURL, -File) is det.
  184%
  185%   Return the name of the cache file   for FileURL. The name is the
  186%   plain filename with the .trp extension.  As   the  URL is a file
  187%   URL, it is guaranteed  to  be   a  valid  filename.  Assumes the
  188%   hosting OS can handle  multiple   exensions  (=|.x.y|=)  though.
  189%   These days thats even true on Windows.
  190
  191local_cache_file(URL, File) :-
  192    file_base_name(URL, Name),
  193    file_name_extension(Name, trp, File).
  194
  195
  196                 /*******************************
  197                 *         GLOBAL CACHE         *
  198                 *******************************/
  199
  200%!  url_cache_file(+URL, +Dir, +Ext, +RW, -Path) is semidet.
  201%
  202%   Determine location of cache-file for the   given  URL in Dir. If
  203%   Ext is provided, the  returned  Path   is  ensured  to  have the
  204%   specified extension.
  205%
  206%   @param RW       If =read=, no directories are created and the call
  207%                   fails if URL is not in the cache.
  208
  209url_cache_file(URL, Dir, Ext, RW, Path) :-
  210    term_hash(URL, Hash0),
  211    Hash is Hash0 + 100000,         % make sure > 4 characters
  212    format(string(Hex), '~16r', [Hash]),
  213    sub_atom(Hex, _, 2, 0, L1),
  214    ensure_dir(Dir, L1, RW, Dir1),
  215    sub_atom(Hex, _, 2, 2, L2),
  216    ensure_dir(Dir1, L2, RW, Dir2),
  217    url_to_file(URL, File),
  218    ensure_ext(File, Ext, FileExt),
  219    atomic_list_concat([Dir2, /, FileExt], Path).
  220
  221ensure_dir(D0, Sub, RW, Dir) :-
  222    atomic_list_concat([D0, /, Sub], Dir),
  223    (   exists_directory(Dir)
  224    ->  true
  225    ;   RW == write
  226    ->  catch(make_directory(Dir), _, fail)
  227    ).
  228
  229ensure_ext(File, '', File) :- !.
  230ensure_ext(File, Ext, File) :-
  231    file_name_extension(_, Ext, File),
  232    !.
  233ensure_ext(File, Ext, FileExt) :-
  234    file_name_extension(File, Ext, FileExt).
  235
  236%!  url_to_file(+URL, -File)
  237%
  238%   Convert a URL in something that fits  in a file, i.e. avoiding /
  239%   and :. We  simply  replace  these  by   -.  We  could  also  use
  240%   www_form_encode/2, but confusion when to replace  as well as the
  241%   fact that we loose the '.' (extension)   makes this a less ideal
  242%   choice.  We could also consider base64 encoding of the name.
  243
  244url_to_file(URL, File) :-
  245    atom_codes(URL, Codes),
  246    phrase(safe_file_name(Codes), FileCodes),
  247    atom_codes(File, FileCodes).
  248
  249safe_file_name([]) -->
  250    [].
  251safe_file_name([H|T]) -->
  252    replace(H),
  253    !,
  254    safe_file_name(T).
  255safe_file_name([H|T]) -->
  256    [H],
  257    safe_file_name(T).
  258
  259%!  replace(+Code)//
  260%
  261%   Replace a character  code  that  cannot   safely  be  put  in  a
  262%   filename. Should we use %XX?
  263
  264replace(0'/)  --> "-".                  % directory separator
  265replace(0'\\) --> "-".                  % not allowed in Windows filename
  266replace(0':)  --> "-".                  % idem
  267replace(0'?)  --> "-".                  % idem
  268replace(0'*)  --> "-".                  % idem
  269
  270
  271                 /*******************************
  272                 *             MESSAGES         *
  273                 *******************************/
  274
  275:- multifile prolog:message/3.  276
  277prolog:message(rdf(cache_created(Dir))) -->
  278    [ 'Created RDF cache directory ~w'-[Dir] ]