View source with raw comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2012-2023, University of Amsterdam
    7                              VU University Amsterdam
    8                              SWI-Prolog Solutions b.v.
    9    All rights reserved.
   10
   11    Redistribution and use in source and binary forms, with or without
   12    modification, are permitted provided that the following conditions
   13    are met:
   14
   15    1. Redistributions of source code must retain the above copyright
   16       notice, this list of conditions and the following disclaimer.
   17
   18    2. Redistributions in binary form must reproduce the above copyright
   19       notice, this list of conditions and the following disclaimer in
   20       the documentation and/or other materials provided with the
   21       distribution.
   22
   23    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   24    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   25    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   26    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   27    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   28    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   29    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   30    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   31    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   32    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   33    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   34    POSSIBILITY OF SUCH DAMAGE.
   35*/
   36
   37:- module(dcg_basics,
   38          [ white//0,                   % <white inside line>
   39            whites//0,                  % <white inside line>*
   40            blank//0,                   % <blank>
   41            blanks//0,                  % <blank>*
   42            nonblank//1,                % <nonblank>
   43            nonblanks//1,               % <nonblank>* --> chars         (long)
   44            blanks_to_nl//0,            % [space,tab,ret]*nl
   45            string//1,                  % <any>* -->chars               (short)
   46            string_without//2,          % Exclude, -->chars             (long)
   47                                        % Characters
   48            alpha_to_lower//1,          % Get lower|upper, return lower
   49                                        % Decimal numbers
   50            digits//1,                  % [0-9]* -->chars
   51            digit//1,                   % [0-9] --> char
   52            integer//1,                 % [+-][0-9]+ --> integer
   53            float//1,                   % [+-]?[0-9]+(.[0-9]*)?(e[+-]?[0-9]+)? --> float
   54            number//1,                  % integer | float
   55                                        % Hexadecimal numbers
   56            xdigits//1,                 % [0-9A-Fa-f]* --> 0-15*
   57            xdigit//1,                  % [0-9A-Fa-f] --> 0-15
   58            xinteger//1,                % [0-9A-Fa-f]+ --> integer
   59
   60            prolog_var_name//1,         % Read a Prolog variable name
   61            csym//1,                    % Read a C symbol
   62
   63            eol//0,                     % End of line
   64            eos//0,                     % Test end of input.
   65            remainder//1,               % -List
   66
   67                                        % generation (TBD)
   68            atom//1                     % generate atom
   69          ]).   70:- use_module(library(lists)).   71:- use_module(library(error)).

Various general DCG utilities

This library provides various commonly used DCG primitives acting on list of character codes. Character classification is based on code_type/2.

This module started its life as library(http/dcg_basics) to support the HTTP protocol. Since then, it was increasingly used in code that has no relation to HTTP and therefore this library was moved to the core library.

To be done
- This is just a starting point. We need a comprehensive set of generally useful DCG primitives. */
 string_without(+EndCodes, -Codes)// is det
Take as many codes from the input until the next character code appears in the list EndCodes. The terminating code itself is left on the input. Typical use is to read upto a defined delimiter such as a newline or other reserved character. For example:
    ...,
    string_without("\n", RestOfLine)
Arguments:
EndCodes- is a list of character codes.
See also
- string//1.
  105string_without(End, Codes) -->
  106    { string(End),
  107      !,
  108      string_codes(End, EndCodes)
  109    },
  110    list_string_without(EndCodes, Codes).
  111string_without(End, Codes) -->
  112    list_string_without(End, Codes).
  113
  114list_string_without(Not, [C|T]) -->
  115    [C],
  116    { \+ memberchk(C, Not)
  117    },
  118    !,
  119    list_string_without(Not, T).
  120list_string_without(_, []) -->
  121    [].
 string(-Codes)// is nondet
Take as few as possible tokens from the input, taking one more each time on backtracking. This code is normally followed by a test for a delimiter. For example:
upto_colon(Atom) -->
        string(Codes), ":", !,
        { atom_codes(Atom, Codes) }.
See also
- string_without//2.
  137string([]) -->
  138    [].
  139string([H|T]) -->
  140    [H],
  141    string(T).
 blanks// is det
Skip zero or more white-space characters.
  147blanks -->
  148    blank,
  149    !,
  150    blanks.
  151blanks -->
  152    [].
 blank// is semidet
Take next space character from input. Space characters include newline.
See also
- white//0
  161blank -->
  162    [C],
  163    { nonvar(C),
  164      code_type(C, space)
  165    }.
 nonblanks(-Codes)// is det
Take all graph characters
  171nonblanks([H|T]) -->
  172    [H],
  173    { code_type(H, graph)
  174    },
  175    !,
  176    nonblanks(T).
  177nonblanks([]) -->
  178    [].
 nonblank(-Code)// is semidet
Code is the next non-blank (graph) character.
  184nonblank(H) -->
  185    [H],
  186    { code_type(H, graph)
  187    }.
 blanks_to_nl// is semidet
Take a sequence of blank//0 codes if blanks are followed by a newline or end of the input.
  194blanks_to_nl -->
  195    "\n",
  196    !.
  197blanks_to_nl -->
  198    blank,
  199    !,
  200    blanks_to_nl.
  201blanks_to_nl -->
  202    eos.
 whites// is det
Skip white space inside a line.
See also
- blanks//0 also skips newlines.
  210whites -->
  211    white,
  212    !,
  213    whites.
  214whites -->
  215    [].
 white// is semidet
Take next white character from input. White characters do not include newline.
  222white -->
  223    [C],
  224    { nonvar(C),
  225      code_type(C, white)
  226    }.
  227
  228
  229                 /*******************************
  230                 *       CHARACTER STUFF        *
  231                 *******************************/
 alpha_to_lower(?C)// is semidet
Read a letter (class alpha) and return it as a lowercase letter. If C is instantiated and the DCG list is already bound, C must be lower and matches both a lower and uppercase letter. If the output list is unbound, its first element is bound to C. For example:
?- alpha_to_lower(0'a, `AB`, R).
R = [66].
?- alpha_to_lower(C, `AB`, R).
C = 97, R = [66].
?- alpha_to_lower(0'a, L, R).
L = [97|R].
  250alpha_to_lower(L) -->
  251    [C],
  252    {   nonvar(C)
  253    ->  code_type(C, alpha),
  254        code_type(C, to_upper(L))
  255    ;   L = C
  256    }.
  257
  258
  259                 /*******************************
  260                 *            NUMBERS           *
  261                 *******************************/
 digits(?Chars)// is det
 digit(?Char)// is det
 integer(?Integer)// is det
Number processing. The predicate digits//1 matches a possibly empty set of digits, digit//1 processes a single digit and integer processes an optional sign followed by a non-empty sequence of digits into an integer.
  272digits([H|T]) -->
  273    digit(H),
  274    !,
  275    digits(T).
  276digits([]) -->
  277    [].
  278
  279digit(C) -->
  280    [C],
  281    { code_type(C, digit)
  282    }.
  283
  284integer(I, Head, Tail) :-
  285    nonvar(I),
  286    !,
  287    format(codes(Head, Tail), '~d', [I]).
  288integer(I) -->
  289    int_codes(Codes),
  290    { number_codes(I, Codes)
  291    }.
  292
  293int_codes([C,D0|D]) -->
  294    sign(C),
  295    !,
  296    digit(D0),
  297    digits(D).
  298int_codes([D0|D]) -->
  299    digit(D0),
  300    digits(D).
 float(?Float)// is det
Process a floating point number. The actual conversion is controlled by number_codes/2.
  308float(F, Head, Tail) :-
  309    float(F),
  310    !,
  311    with_output_to(codes(Head, Tail), write(F)).
  312float(F) -->
  313    number(F),
  314    { float(F) }.
 number(+Number)// is det
number(-Number)// is semidet
Generate extract a number. Handles both integers and floating point numbers.
  322number(N, Head, Tail) :-
  323    number(N),
  324    !,
  325    format(codes(Head, Tail), '~w', N).
  326number(N) -->
  327    { var(N)
  328    },
  329    !,
  330    int_codes(I),
  331    (   dot,
  332        digit(DF0),
  333        digits(DF)
  334    ->  {F = [0'., DF0|DF]}
  335    ;   {F = []}
  336    ),
  337    (   exp
  338    ->  int_codes(DI),
  339        {E=[0'e|DI]}
  340    ;   {E = []}
  341    ),
  342    { append([I, F, E], Codes),
  343      number_codes(N, Codes)
  344    }.
  345number(N) -->
  346    { type_error(number, N) }.
  347
  348sign(0'-) --> "-".
  349sign(0'+) --> "+".
  350
  351dot --> ".".
  352
  353exp --> "e".
  354exp --> "E".
  355
  356                 /*******************************
  357                 *          HEX NUMBERS         *
  358                 *******************************/
 xinteger(+Integer)// is det
xinteger(-Integer)// is semidet
Generate or extract an integer from a sequence of hexadecimal digits. Hexadecimal characters include both uppercase (A-F) and lowercase (a-f) letters. The value may be preceded by a sign (+/-)
  368xinteger(Val, Head, Tail) :-
  369    integer(Val),
  370    !,
  371    format(codes(Head, Tail), '~16r', [Val]).
  372xinteger(Val) -->
  373    sign(C),
  374    !,
  375    xdigit(D0),
  376    xdigits(D),
  377    { mkval([D0|D], 16, Val0),
  378      (   C == 0'-
  379      ->  Val is -Val0
  380      ;   Val = Val0
  381      )
  382    }.
  383xinteger(Val) -->
  384    xdigit(D0),
  385    xdigits(D),
  386    { mkval([D0|D], 16, Val)
  387    }.
 xdigit(-Weight)// is semidet
True if the next code is a hexdecimal digit with Weight. Weight is between 0 and 15. Hexadecimal characters include both uppercase (A-F) and lowercase (a-f) letters.
  395xdigit(D) -->
  396    [C],
  397    { code_type(C, xdigit(D))
  398    }.
 xdigits(-WeightList)// is det
List of weights of a sequence of hexadecimal codes. WeightList may be empty. Hexadecimal characters include both uppercase (A-F) and lowercase (a-f) letters.
  406xdigits([D0|D]) -->
  407    xdigit(D0),
  408    !,
  409    xdigits(D).
  410xdigits([]) -->
  411    [].
  412
  413mkval([W0|Weights], Base, Val) :-
  414    mkval(Weights, Base, W0, Val).
  415
  416mkval([], _, W, W).
  417mkval([H|T], Base, W0, W) :-
  418    W1 is W0*Base+H,
  419    mkval(T, Base, W1, W).
  420
  421
  422                 /*******************************
  423                 *         END-OF-STRING        *
  424                 *******************************/
 eol//
Matches end-of-line. Matching \r\n, \n or end of input (eos//0).
  430eol --> "\n", !.
  431eol --> "\r\n", !.
  432eol --> eos.
 eos//
Matches end-of-input. The implementation behaves as the following portable implementation:
eos --> call(eos_).
eos_([], []).
To be done
- This is a difficult concept and violates the context free property of DCGs. Explain the exact problems.
  447eos([], []).
 remainder(-List)//
Unify List with the remainder of the input.
  453remainder(List, List, []).
  454
  455
  456                 /*******************************
  457                 *         PROLOG SYNTAX                *
  458                 *******************************/
 prolog_var_name(-Name:atom)// is semidet
Matches a Prolog variable name. Primarily intended to deal with quasi quotations that embed Prolog variables.
  465prolog_var_name(Name) -->
  466    [C0], { code_type(C0, prolog_var_start) },
  467    !,
  468    prolog_id_cont(CL),
  469    { atom_codes(Name, [C0|CL]) }.
  470
  471prolog_id_cont([H|T]) -->
  472    [H], { code_type(H, prolog_identifier_continue) },
  473    !,
  474    prolog_id_cont(T).
  475prolog_id_cont([]) --> "".
  476
  477
  478                 /*******************************
  479                 *          IDENTIFIERS         *
  480                 *******************************/
 csym(?Symbol:atom)// is semidet
Recognise a C symbol according to the csymf and csym code type classification provided by the C library.
  487csym(Name, Head, Tail) :-
  488    nonvar(Name),
  489    format(codes(Head, Tail), '~w', [Name]).
  490csym(Name) -->
  491    [F], {code_type(F, csymf)},
  492    csyms(Rest),
  493    { atom_codes(Name, [F|Rest]) }.
  494
  495csyms([H|T]) -->
  496    [H], {code_type(H, csym)},
  497    !,
  498    csyms(T).
  499csyms([]) -->
  500    "".
  501
  502
  503                 /*******************************
  504                 *           GENERATION         *
  505                 *******************************/
 atom(++Atom)// is det
Generate codes of Atom. Current implementation uses write/1, dealing with any Prolog term. Atom must be ground though.
  512atom(Atom, Head, Tail) :-
  513    must_be(ground, Atom),
  514    format(codes(Head, Tail), '~w', [Atom])