View source with formatted comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (C): 2008-2016, University of Amsterdam
    7                              VU University Amsterdam
    8    All rights reserved.
    9
   10    Redistribution and use in source and binary forms, with or without
   11    modification, are permitted provided that the following conditions
   12    are met:
   13
   14    1. Redistributions of source code must retain the above copyright
   15       notice, this list of conditions and the following disclaimer.
   16
   17    2. Redistributions in binary form must reproduce the above copyright
   18       notice, this list of conditions and the following disclaimer in
   19       the documentation and/or other materials provided with the
   20       distribution.
   21
   22    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   23    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   24    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   25    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   26    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   27    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   28    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   29    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   30    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   31    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   32    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   33    POSSIBILITY OF SUCH DAMAGE.
   34*/
   35
   36:- module(pure_input,
   37          [ phrase_from_file/2,         % :Grammar, +File
   38            phrase_from_file/3,         % :Grammar, +File, +Options
   39            phrase_from_stream/2,       % :Grammar, +Stream
   40            stream_to_lazy_list/2,      % :Stream -List
   41
   42            syntax_error//1,            % +ErrorTerm
   43                                        % Low level interface
   44            lazy_list_location//1,      % -Location
   45            lazy_list_character_count//1 % -CharacterCount
   46          ]).   47:- autoload(library(error),[type_error/2,permission_error/3]).   48
   49
   50:- set_prolog_flag(generate_debug_info, false).   51
   52/** <module> Pure Input from files and streams
   53
   54This module is part of pio.pl,   dealing with _pure_ _input_: processing
   55input streams from the outside  world   using  pure  predicates, notably
   56grammar rules (DCG).  Using  pure   predicates  makes  non-deterministic
   57processing of input much simpler.
   58
   59Pure input uses attributed variables  to   read  input from the external
   60source into a list _|on demand|_. The   overhead of lazy reading is more
   61than compensated for by using block reads based on read_pending_codes/3.
   62
   63Ulrich Neumerkel came up with the idea to use coroutining for creating a
   64_lazy list_. His implementation  repositioned  the   file  to  deal with
   65re-reading  that  can  be  necessary    on   backtracking.  The  current
   66implementation uses destructive assignment together  with more low-level
   67attribute handling to realise pure input on any (buffered) stream.
   68
   69@tbd    Provide support for alternative input readers, e.g. reading
   70        terms, tokens, etc.
   71*/
   72
   73:- predicate_options(phrase_from_file/3, 3,
   74                     [ pass_to(system:open/4, 4)
   75                     ]).   76
   77%!  phrase_from_file(:Grammar, +File) is nondet.
   78%
   79%   Process the content of File  using   the  DCG  rule Grammar. The
   80%   space usage of this mechanism depends on   the length of the not
   81%   committed part of Grammar. Committed parts of the temporary list
   82%   are reclaimed by the  garbage  collector,   while  the  list  is
   83%   extended on demand due to  unification   of  the attributed tail
   84%   variable. Below is an example that counts  the number of times a
   85%   string appears in  a  file.   The  library  dcg/basics  provides
   86%   string//1 matching an arbitrary string   and  remainder//1 which
   87%   matches the remainder of the input without parsing.
   88%
   89%   ==
   90%   :- use_module(library(dcg/basics)).
   91%
   92%   file_contains(File, Pattern) :-
   93%           phrase_from_file(match(Pattern), File).
   94%
   95%   match(Pattern) -->
   96%           string(_),
   97%           string(Pattern),
   98%           remainder(_).
   99%
  100%   match_count(File, Pattern, Count) :-
  101%           aggregate_all(count, file_contains(File, Pattern), Count).
  102%   ==
  103%
  104%   This can be called as (note that   the  pattern must be a string
  105%   (code list)):
  106%
  107%   ==
  108%   ?- match_count('pure_input.pl', `file`, Count).
  109%   ==
  110
  111:- meta_predicate
  112    phrase_from_file(//, +),
  113    phrase_from_file(//, +, +),
  114    phrase_from_stream(//, +).  115
  116phrase_from_file(Grammar, File) :-
  117    phrase_from_file(Grammar, File, []).
  118
  119%!  phrase_from_file(:Grammar, +File, +Options) is nondet.
  120%
  121%   As phrase_from_file/2, providing additional Options. Options are
  122%   passed to open/4.
  123
  124phrase_from_file(Grammar, File, Options) :-
  125    setup_call_cleanup(
  126        open(File, read, In, Options),
  127        phrase_from_stream(Grammar, In),
  128        close(In)).
  129
  130%!  phrase_from_stream(:Grammar, +Stream)
  131%
  132%   Run Grammer against the character codes   on Stream. Stream must
  133%   be buffered.
  134
  135phrase_from_stream(Grammar, In) :-
  136    stream_to_lazy_list(In, List),
  137    phrase(Grammar, List).
  138
  139%!  syntax_error(+Error)//
  140%
  141%   Throw the syntax error Error  at   the  current  location of the
  142%   input. This predicate is designed to  be called from the handler
  143%   of phrase_from_file/3.
  144%
  145%   @throws error(syntax_error(Error), Location)
  146
  147syntax_error(Error) -->
  148    lazy_list_location(Location),
  149    { throw(error(syntax_error(Error), Location))
  150    }.
  151
  152%!  lazy_list_location(-Location)// is det.
  153%
  154%   Determine current (error) location in  a   lazy  list. True when
  155%   Location is an (error) location term that represents the current
  156%   location in the DCG list.
  157%
  158%   @arg    Location is a term file(Name, Line, LinePos, CharNo) or
  159%           stream(Stream, Line, LinePos, CharNo) if no file is
  160%           associated to the stream RestLazyList.  Finally, if the
  161%           Lazy list is fully materialized (ends in =|[]|=), Location
  162%           is unified with `end_of_file-CharCount`.
  163%   @see    lazy_list_character_count//1 only provides the character
  164%           count.
  165
  166lazy_list_location(Location, Here, Here) :-
  167    lazy_list_location(Here, Location).
  168
  169lazy_list_location(Here, Location) :-
  170    '$skip_list'(Skipped, Here, Tail),
  171    (   attvar(Tail)
  172    ->  get_attr(Tail, pure_input, State),
  173        State = lazy_input(Stream, PrevPos, Pos, _),
  174        Details = [Line, LinePos, CharNo],
  175        (   stream_property(Stream, file_name(File))
  176        ->  PosParts = [file, File|Details]
  177        ;   PosParts = [stream, Stream|Details]
  178        ),
  179        Location =.. PosParts,
  180        (   PrevPos == (-)                  % nothing is read.
  181        ->  Line = 1, LinePos = 0, CharNo = 0
  182        ;   stream_position_data(char_count, Pos, EndRecordCharNo),
  183            CharNo is EndRecordCharNo - Skipped,
  184            set_stream_position(Stream, PrevPos),
  185            stream_position_data(char_count, PrevPos, StartRecordCharNo),
  186            Skip is CharNo-StartRecordCharNo,
  187            forall(between(1, Skip, _), get_code(Stream, _)),
  188            stream_property(Stream, position(ErrorPos)),
  189            stream_position_data(line_count, ErrorPos, Line),
  190            stream_position_data(line_position, ErrorPos, LinePos)
  191        )
  192    ;   Tail == []
  193    ->  Location = end_of_file-Skipped
  194    ;   type_error(lazy_list, Here)
  195    ).
  196
  197
  198%!  lazy_list_character_count(-CharCount)//
  199%
  200%   True when CharCount is the current   character count in the Lazy
  201%   list. The character count is computed by finding the distance to
  202%   the next frozen tail of the lazy list. CharCount is one of:
  203%
  204%     - An integer
  205%     - A term end_of_file-Count
  206%
  207%   @see    lazy_list_location//1 provides full details of the location
  208%           for error reporting.
  209
  210lazy_list_character_count(Location, Here, Here) :-
  211    lazy_list_character_count(Here, Location).
  212
  213lazy_list_character_count(Here, CharNo) :-
  214    '$skip_list'(Skipped, Here, Tail),
  215    (   attvar(Tail)
  216    ->  get_attr(Tail, pure_input, State),
  217        arg(3, State, Pos),
  218        stream_position_data(char_count, Pos, EndRecordCharNo),
  219        CharNo is EndRecordCharNo - Skipped
  220    ;   Tail == []
  221    ->  CharNo = end_of_file-Skipped
  222    ;   type_error(lazy_list, Here)
  223    ).
  224
  225
  226%!  stream_to_lazy_list(+Stream, -List) is det.
  227%
  228%   Create a lazy list representing the   character codes in Stream.
  229%   List is a  partial  list  ending   in  an  attributed  variable.
  230%   Unifying this variable reads the next   block of data. The block
  231%   is stored with the attribute value such that there is no need to
  232%   re-read it.
  233%
  234%   @compat Unlike the previous version of this predicate this
  235%           version does not require a repositionable stream.  It
  236%           does require a buffer size of at least the maximum
  237%           number of bytes of a multi-byte sequence (6).
  238
  239stream_to_lazy_list(Stream, List) :-
  240    (   stream_property(Stream, buffer(false))
  241    ->  permission_error(create, lazy_list, Stream)
  242    ;   true
  243    ),
  244    stream_to_lazy_list(Stream, -, List).
  245
  246stream_to_lazy_list(Stream, PrevPos, List) :-
  247    stream_property(Stream, position(Pos)),
  248    put_attr(List, pure_input, lazy_input(Stream, PrevPos, Pos, _)).
  249
  250attr_unify_hook(State, Value) :-
  251    '$notrace'(attr_unify_hook_ndebug(State, Value)).
  252
  253attr_unify_hook_ndebug(State, Value) :-
  254    State = lazy_input(Stream, _PrevPos, Pos, Read),
  255    (   var(Read)
  256    ->  fill_buffer(Stream),
  257        read_pending_codes(Stream, NewList, Tail),
  258        (   Tail == []
  259        ->  nb_setarg(4, State, []),
  260            Value = []
  261        ;   stream_to_lazy_list(Stream, Pos, Tail),
  262            nb_linkarg(4, State, NewList),
  263            Value = NewList
  264        )
  265    ;   Value = Read
  266    )