View source with formatted comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2002-2023, University of Amsterdam
    7                              VU University Amsterdam
    8                              CWI, Amsterdam
    9                              SWI-Prolog Solutions b.v.
   10    All rights reserved.
   11
   12    Redistribution and use in source and binary forms, with or without
   13    modification, are permitted provided that the following conditions
   14    are met:
   15
   16    1. Redistributions of source code must retain the above copyright
   17       notice, this list of conditions and the following disclaimer.
   18
   19    2. Redistributions in binary form must reproduce the above copyright
   20       notice, this list of conditions and the following disclaimer in
   21       the documentation and/or other materials provided with the
   22       distribution.
   23
   24    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   25    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   26    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   27    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   28    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   29    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   30    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   31    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   32    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   33    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   34    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   35    POSSIBILITY OF SUCH DAMAGE.
   36*/
   37
   38:- module(http_open,
   39          [ http_open/3,                % +URL, -Stream, +Options
   40            http_set_authorization/2,   % +URL, +Authorization
   41            http_close_keep_alive/1     % +Address
   42          ]).   43:- autoload(library(aggregate),[aggregate_all/3]).   44:- autoload(library(apply),[foldl/4,include/3]).   45:- autoload(library(base64),[base64/3]).   46:- use_module(library(debug),[debug/3,debugging/1]).   47:- autoload(library(error),
   48	    [ domain_error/2, must_be/2, existence_error/2, instantiation_error/1
   49	    ]).   50:- autoload(library(lists),[last/2,member/2]).   51:- autoload(library(option),
   52	    [ meta_options/3, option/2, select_option/4, merge_options/3,
   53	      option/3, select_option/3
   54	    ]).   55:- autoload(library(readutil),[read_line_to_codes/2]).   56:- autoload(library(uri),
   57	    [ uri_resolve/3, uri_components/2, uri_data/3,
   58              uri_authority_components/2, uri_authority_data/3,
   59	      uri_encoded/3, uri_query_components/2, uri_is_global/1
   60	    ]).   61:- autoload(library(http/http_header),
   62            [ http_parse_header/2, http_post_data/3 ]).   63:- autoload(library(http/http_stream),[stream_range_open/3]).   64:- if(exists_source(library(ssl))).   65:- autoload(library(ssl), [ssl_upgrade_legacy_options/2]).   66:- endif.   67:- use_module(library(socket)).   68
   69
   70/** <module> HTTP client library
   71
   72This library defines http_open/3, which opens an URL as a Prolog stream.
   73The functionality of the  library  can   be  extended  by  loading two
   74additional modules that act as plugins:
   75
   76    * library(http/http_ssl_plugin)
   77    Loading this library causes http_open/3 to handle HTTPS connections.
   78    Relevant options for SSL certificate handling are handed to
   79    ssl_context/3. This plugin is loaded automatically if the scheme
   80    `https` is requested using a default SSL context. See the plugin for
   81    additional information regarding security.
   82
   83    * library(zlib)
   84    Loading this library supports the `gzip` transfer encoding.  This
   85    plugin is lazily loaded if a connection is opened that claims this
   86    transfer encoding.
   87
   88    * library(http/http_cookie)
   89    Loading this library adds tracking cookies to http_open/3. Returned
   90    cookies are collected in the Prolog database and supplied for
   91    subsequent requests.
   92
   93    * library(http/http_stream)
   94    This library adds support for _chunked_ encoding. It is lazily
   95    loaded if the server sends a ``Transfer-encoding: chunked`` header.
   96
   97
   98Here is a simple example to fetch a web-page:
   99
  100```
  101?- http_open('http://www.google.com/search?q=prolog', In, []),
  102   copy_stream_data(In, user_output),
  103   close(In).
  104<!doctype html><head><title>prolog - Google Search</title><script>
  105...
  106```
  107
  108The example below fetches the modification time of a web-page. Note that
  109=|Modified|= is =|''|= (the empty atom) if the  web-server does not provide a
  110time-stamp for the resource. See also parse_time/2.
  111
  112```
  113modified(URL, Stamp) :-
  114       http_open(URL, In,
  115                 [ method(head),
  116                   header(last_modified, Modified)
  117                 ]),
  118       close(In),
  119       Modified \== '',
  120       parse_time(Modified, Stamp).
  121```
  122
  123Then next example uses Google search. It exploits library(uri) to manage
  124URIs, library(sgml) to load  an  HTML   document  and  library(xpath) to
  125navigate the parsed HTML. Note that  you   may  need to adjust the XPath
  126queries if the data returned by Google changes (this example indeed
  127no longer works and currently fails at the first xpath/3 call)
  128
  129```
  130:- use_module(library(http/http_open)).
  131:- use_module(library(xpath)).
  132:- use_module(library(sgml)).
  133:- use_module(library(uri)).
  134
  135google(For, Title, HREF) :-
  136        uri_encoded(query_value, For, Encoded),
  137        atom_concat('http://www.google.com/search?q=', Encoded, URL),
  138        http_open(URL, In, []),
  139        call_cleanup(
  140            load_html(In, DOM, []),
  141            close(In)),
  142        xpath(DOM, //h3(@class=r), Result),
  143        xpath(Result, //a(@href=HREF0, text), Title),
  144        uri_components(HREF0, Components),
  145        uri_data(search, Components, Query),
  146        uri_query_components(Query, Parts),
  147        memberchk(q=HREF, Parts).
  148```
  149
  150An example query is below:
  151
  152```
  153?- google(prolog, Title, HREF).
  154Title = 'SWI-Prolog',
  155HREF = 'http://www.swi-prolog.org/' ;
  156Title = 'Prolog - Wikipedia',
  157HREF = 'https://nl.wikipedia.org/wiki/Prolog' ;
  158Title = 'Prolog - Wikipedia, the free encyclopedia',
  159HREF = 'https://en.wikipedia.org/wiki/Prolog' ;
  160Title = 'Pro-Log is logistiek dienstverlener m.b.t. vervoer over water.',
  161HREF = 'http://www.pro-log.nl/' ;
  162Title = 'Learn Prolog Now!',
  163HREF = 'http://www.learnprolognow.org/' ;
  164Title = 'Free Online Version - Learn Prolog
  165...
  166```
  167
  168@see load_html/3 and xpath/3 can be used to parse and navigate HTML
  169     documents.
  170@see http_get/3 and http_post/4 provide an alternative interface that
  171     convert the reply depending on the =|Content-Type|= header.
  172*/
  173
  174:- multifile
  175    http:encoding_filter/3,           % +Encoding, +In0, -In
  176    http:current_transfer_encoding/1, % ?Encoding
  177    http:disable_encoding_filter/1,   % +ContentType
  178    http:http_protocol_hook/5,        % +Protocol, +Parts, +StreamPair,
  179                                      % -NewStreamPair, +Options
  180    http:open_options/2,              % +Parts, -Options
  181    http:write_cookies/3,             % +Out, +Parts, +Options
  182    http:update_cookies/3,            % +CookieLine, +Parts, +Options
  183    http:authenticate_client/2,       % +URL, +Action
  184    http:http_connection_over_proxy/6.  185
  186:- meta_predicate
  187    http_open(+,-,:).  188
  189:- predicate_options(http_open/3, 3,
  190                     [ authorization(compound),
  191                       final_url(-atom),
  192                       header(+atom, -atom),
  193                       headers(-list),
  194                       raw_headers(-list(string)),
  195                       connection(+atom),
  196                       method(oneof([delete,get,put,purge,head,
  197                                     post,patch,options])),
  198                       size(-integer),
  199                       status_code(-integer),
  200                       output(-stream),
  201                       timeout(number),
  202                       unix_socket(+atom),
  203                       proxy(atom, integer),
  204                       proxy_authorization(compound),
  205                       bypass_proxy(boolean),
  206                       request_header(any),
  207                       user_agent(atom),
  208                       version(-compound),
  209        % The option below applies if library(http/http_header) is loaded
  210                       post(any),
  211        % The options below apply if library(http/http_ssl_plugin)) is loaded
  212                       pem_password_hook(callable),
  213                       cacert_file(atom),
  214                       cert_verify_hook(callable)
  215                     ]).  216
  217%!  user_agent(-Agent) is det.
  218%
  219%   Default value for =|User-Agent|=,  can   be  overruled using the
  220%   option user_agent(Agent) of http_open/3.
  221
  222user_agent('SWI-Prolog').
  223
  224%!  http_open(+URL, -Stream, +Options) is det.
  225%
  226%   Open the data at the HTTP  server   as  a  Prolog stream. URL is
  227%   either an atom  specifying  a  URL   or  a  list  representing a
  228%   broken-down  URL  as  specified  below.   After  this  predicate
  229%   succeeds the data can be read from Stream. After completion this
  230%   stream must be  closed  using   the  built-in  Prolog  predicate
  231%   close/1. Options provides additional options:
  232%
  233%     * authenticate(+Boolean)
  234%     If `false` (default `true`), do _not_ try to automatically
  235%     authenticate the client if a 401 (Unauthorized) status code
  236%     is received.
  237%
  238%     * authorization(+Term)
  239%     Send authorization. See also http_set_authorization/2. Supported
  240%     schemes:
  241%
  242%       - basic(+User, +Password)
  243%       HTTP Basic authentication.
  244%       - bearer(+Token)
  245%       HTTP Bearer authentication.
  246%       - digest(+User, +Password)
  247%       HTTP Digest authentication.  This option is only provided
  248%       if the plugin library(http/http_digest) is also loaded.
  249%
  250%     * unix_socket(+Path)
  251%     Connect to the given Unix domain socket.  In this scenario
  252%     the host name and port or ignored.  If the server replies
  253%     with a _redirect_ message and the host differs from the
  254%     original host as normal TCP connection is used to handle
  255%     the redirect.  This option is inspired by curl(1)'s option
  256%     `--unix-socket`.
  257%
  258%     * connection(+Connection)
  259%     Specify the =Connection= header.  Default is =close=.  The
  260%     alternative is =|Keep-alive|=.  This maintains a pool of
  261%     available connections as determined by keep_connection/1.
  262%     The library(http/websockets) uses =|Keep-alive, Upgrade|=.
  263%     Keep-alive connections can be closed explicitly using
  264%     http_close_keep_alive/1. Keep-alive connections may
  265%     significantly improve repetitive requests on the same server,
  266%     especially if the IP route is long, HTTPS is used or the
  267%     connection uses a proxy.
  268%
  269%     * final_url(-FinalURL)
  270%     Unify FinalURL with the final   destination. This differs from
  271%     the  original  URL  if  the  returned  head  of  the  original
  272%     indicates an HTTP redirect (codes 301,  302 or 303). Without a
  273%     redirect, FinalURL is the same as URL if  URL is an atom, or a
  274%     URL constructed from the parts.
  275%
  276%     * header(Name, -AtomValue)
  277%     If provided, AtomValue is  unified  with   the  value  of  the
  278%     indicated  field  in  the  reply    header.  Name  is  matched
  279%     case-insensitive and the underscore  (_)   matches  the hyphen
  280%     (-). Multiple of these options  may   be  provided  to extract
  281%     multiple  header  fields.  If  the  header  is  not  available
  282%     AtomValue is unified to the empty atom ('').
  283%
  284%     * headers(-List)
  285%     If provided,  List is unified  with a list of  Name(Value) pairs
  286%     corresponding to  fields in  the reply  header.  Name  and Value
  287%     follow  the  same  conventions used  by  the  header(Name,Value)
  288%     option.  A  pseudo header status_code(Code) is  added to provide
  289%     the  HTTP status  as  an integer.   See also  raw_headers(-List)
  290%     which  provides  the  entire   HTTP  reply  header  in  unparsed
  291%     representation.
  292%
  293%     * method(+Method)
  294%     One of =get= (default), =head=, =delete=, =post=,   =put=   or
  295%     =patch=.
  296%     The  =head= message can be
  297%     used in combination with  the   header(Name,  Value) option to
  298%     access information on the resource   without actually fetching
  299%     the resource itself.  The  returned   stream  must  be  closed
  300%     immediately.
  301%
  302%     If post(Data) is provided, the default is =post=.
  303%
  304%     * size(-Size)
  305%     Size is unified with the   integer value of =|Content-Length|=
  306%     in the reply header.
  307%
  308%     * version(-Version)
  309%     Version is a _pair_ `Major-Minor`, where `Major` and `Minor`
  310%     are integers representing the HTTP version in the reply header.
  311%
  312%     * range(+Range)
  313%     Ask for partial content. Range   is  a term _|Unit(From,To)|_,
  314%     where `From` is an integer and `To`   is  either an integer or
  315%     the atom `end`. HTTP 1.1 only   supports Unit = `bytes`. E.g.,
  316%     to   ask   for    bytes    1000-1999,     use    the    option
  317%     range(bytes(1000,1999))
  318%
  319%     * raw_encoding(+Encoding)
  320%     Do not install a decoding filter for Encoding.  For example,
  321%     using raw_encoding('applocation/gzip') the system will not
  322%     decompress the stream if it is compressed using `gzip`.
  323%
  324%     * raw_headers(-Lines)
  325%     Unify Lines with a list of strings that represents the complete
  326%     reply header returned by the server.  See also headers(-List).
  327%
  328%     * redirect(+Boolean)
  329%     If `false` (default `true`), do _not_ automatically redirect
  330%     if a 3XX code is received.  Must be combined with
  331%     status_code(Code) and one of the header options to read the
  332%     redirect reply. In particular, without status_code(Code) a
  333%     redirect is mapped to an exception.
  334%
  335%     * status_code(-Code)
  336%     If this option is  present  and   Code  unifies  with the HTTP
  337%     status code, do *not* translate errors (4xx, 5xx) into an
  338%     exception. Instead, http_open/3 behaves as if 2xx (success) is
  339%     returned, providing the application to read the error document
  340%     from the returned stream.
  341%
  342%     * output(-Out)
  343%     Unify the output stream with Out and do not close it. This can
  344%     be used to upgrade a connection.
  345%
  346%     * timeout(+Timeout)
  347%     If provided, set a timeout on   the stream using set_stream/2.
  348%     With this option if no new data arrives within Timeout seconds
  349%     the stream raises an exception.  Default   is  to wait forever
  350%     (=infinite=).
  351%
  352%     * post(+Data)
  353%     Issue a =POST= request on the HTTP server.  Data is
  354%     handed to http_post_data/3.
  355%
  356%     * proxy(+Host:Port)
  357%     Use an HTTP proxy to connect to the outside world.  See also
  358%     socket:proxy_for_url/3.  This option overrules the proxy
  359%     specification defined by socket:proxy_for_url/3.
  360%
  361%     * proxy(+Host, +Port)
  362%     Synonym for proxy(+Host:Port).  Deprecated.
  363%
  364%     * proxy_authorization(+Authorization)
  365%     Send authorization to the proxy.  Otherwise   the  same as the
  366%     =authorization= option.
  367%
  368%     * bypass_proxy(+Boolean)
  369%     If =true=, bypass proxy hooks.  Default is =false=.
  370%
  371%     * request_header(Name = Value)
  372%     Additional  name-value  parts  are  added   in  the  order  of
  373%     appearance to the HTTP request   header.  No interpretation is
  374%     done.
  375%
  376%     * max_redirect(+Max)
  377%     Sets the maximum length of a redirection chain.  This is needed
  378%     for some IRIs that redirect indefinitely to other IRIs without
  379%     looping (e.g., redirecting to IRIs with a random element in them).
  380%     Max must be either a non-negative integer or the atom `infinite`.
  381%     The default value is `10`.
  382%
  383%     * user_agent(+Agent)
  384%     Defines the value of the  =|User-Agent|=   field  of  the HTTP
  385%     header. Default is =SWI-Prolog=.
  386%
  387%   The hook http:open_options/2 can  be   used  to  provide default
  388%   options   based   on   the   broken-down     URL.   The   option
  389%   status_code(-Code)  is  particularly  useful   to  query  *REST*
  390%   interfaces that commonly return status   codes  other than `200`
  391%   that need to be be processed by the client code.
  392%
  393%   @param URL is either an atom or string (url) or a list of _parts_.
  394%
  395%               When provided, this list may contain the fields
  396%               =scheme=, =user=, =password=, =host=, =port=, =path=
  397%               and either =query_string= (whose argument is an atom)
  398%               or =search= (whose argument is a list of
  399%               =|Name(Value)|= or =|Name=Value|= compound terms).
  400%               Only =host= is mandatory.  The example below opens the
  401%               URL =|http://www.example.com/my/path?q=Hello%20World&lang=en|=.
  402%               Note that values must *not* be quoted because the
  403%               library inserts the required quotes.
  404%
  405%               ```
  406%               http_open([ host('www.example.com'),
  407%                           path('/my/path'),
  408%                           search([ q='Hello world',
  409%                                    lang=en
  410%                                  ])
  411%                         ])
  412%               ```
  413%
  414%   @throws error(existence_error(url, Id),Context) is raised if the
  415%   HTTP result code is not in the range 200..299. Context has the
  416%   shape context(Message, status(Code, TextCode)), where `Code` is the
  417%   numeric HTTP code and `TextCode` is the textual description thereof
  418%   provided by the server. `Message` may provide additional details or
  419%   may be unbound.
  420%
  421%   @see ssl_context/3 for SSL related options if
  422%   library(http/http_ssl_plugin) is loaded.
  423
  424:- multifile
  425    socket:proxy_for_url/3.           % +URL, +Host, -ProxyList
  426
  427http_open(URL, Stream, QOptions) :-
  428    meta_options(is_meta, QOptions, Options0),
  429    (   atomic(URL)
  430    ->  parse_url_ex(URL, Parts)
  431    ;   Parts = URL
  432    ),
  433    autoload_https(Parts),
  434    upgrade_ssl_options(Parts, Options0, Options),
  435    add_authorization(Parts, Options, Options1),
  436    findall(HostOptions, hooked_options(Parts, HostOptions), AllHostOptions),
  437    foldl(merge_options_rev, AllHostOptions, Options1, Options2),
  438    (   option(bypass_proxy(true), Options)
  439    ->  try_http_proxy(direct, Parts, Stream, Options2)
  440    ;   term_variables(Options2, Vars2),
  441        findall(Result-Vars2,
  442                try_a_proxy(Parts, Result, Options2),
  443                ResultList),
  444        last(ResultList, Status-Vars2)
  445    ->  (   Status = true(_Proxy, Stream)
  446        ->  true
  447        ;   throw(error(proxy_error(tried(ResultList)), _))
  448        )
  449    ;   try_http_proxy(direct, Parts, Stream, Options2)
  450    ).
  451
  452try_a_proxy(Parts, Result, Options) :-
  453    parts_uri(Parts, AtomicURL),
  454    option(host(Host), Parts),
  455    (   option(unix_socket(Path), Options)
  456    ->  Proxy = unix_socket(Path)
  457    ;   (   option(proxy(ProxyHost:ProxyPort), Options)
  458        ;   is_list(Options),
  459            memberchk(proxy(ProxyHost,ProxyPort), Options)
  460        )
  461    ->  Proxy = proxy(ProxyHost, ProxyPort)
  462    ;   socket:proxy_for_url(AtomicURL, Host, Proxy)
  463    ),
  464    debug(http(proxy),
  465          'http_open: Connecting via ~w to ~w', [Proxy, AtomicURL]),
  466    (   catch(try_http_proxy(Proxy, Parts, Stream, Options), E, true)
  467    ->  (   var(E)
  468        ->  !, Result = true(Proxy, Stream)
  469        ;   Result = error(Proxy, E)
  470        )
  471    ;   Result = false(Proxy)
  472    ),
  473    debug(http(proxy), 'http_open: ~w: ~p', [Proxy, Result]).
  474
  475try_http_proxy(Method, Parts, Stream, Options0) :-
  476    option(host(Host), Parts),
  477    proxy_request_uri(Method, Parts, RequestURI),
  478    select_option(visited(Visited0), Options0, OptionsV, []),
  479    Options = [visited([Parts|Visited0])|OptionsV],
  480    parts_scheme(Parts, Scheme),
  481    default_port(Scheme, DefPort),
  482    url_part(port(Port), Parts, DefPort),
  483    host_and_port(Host, DefPort, Port, HostPort),
  484    (   option(connection(Connection), Options0),
  485        keep_alive(Connection),
  486        get_from_pool(Host:Port, StreamPair),
  487        debug(http(connection), 'Trying Keep-alive to ~p using ~p',
  488              [ Host:Port, StreamPair ]),
  489        catch(send_rec_header(StreamPair, Stream, HostPort,
  490                              RequestURI, Parts, Options),
  491              Error,
  492              keep_alive_error(Error, StreamPair))
  493    ->  true
  494    ;   http:http_connection_over_proxy(Method, Parts, Host:Port,
  495                                        SocketStreamPair, Options, Options1),
  496        (   catch(http:http_protocol_hook(Scheme, Parts,
  497                                          SocketStreamPair,
  498                                          StreamPair, Options),
  499                  Error,
  500                  ( close(SocketStreamPair, [force(true)]),
  501                    throw(Error)))
  502        ->  true
  503        ;   StreamPair = SocketStreamPair
  504        ),
  505        send_rec_header(StreamPair, Stream, HostPort,
  506                        RequestURI, Parts, Options1)
  507    ),
  508    return_final_url(Options).
  509
  510proxy_request_uri(direct, Parts, RequestURI) :-
  511    !,
  512    parts_request_uri(Parts, RequestURI).
  513proxy_request_uri(unix_socket(_), Parts, RequestURI) :-
  514    !,
  515    parts_request_uri(Parts, RequestURI).
  516proxy_request_uri(_, Parts, RequestURI) :-
  517    parts_uri(Parts, RequestURI).
  518
  519http:http_connection_over_proxy(unix_socket(Path), _, _,
  520                                StreamPair, Options, Options) :-
  521    !,
  522    unix_domain_socket(Socket),
  523    tcp_connect(Socket, Path),
  524    tcp_open_socket(Socket, In, Out),
  525    stream_pair(StreamPair, In, Out).
  526http:http_connection_over_proxy(direct, _, Host:Port,
  527                                StreamPair, Options, Options) :-
  528    !,
  529    open_socket(Host:Port, StreamPair, Options).
  530http:http_connection_over_proxy(proxy(ProxyHost, ProxyPort), Parts, _,
  531                                StreamPair, Options, Options) :-
  532    \+ ( memberchk(scheme(Scheme), Parts),
  533         secure_scheme(Scheme)
  534       ),
  535    !,
  536    % We do not want any /more/ proxy after this
  537    open_socket(ProxyHost:ProxyPort, StreamPair,
  538                [bypass_proxy(true)|Options]).
  539http:http_connection_over_proxy(socks(SocksHost, SocksPort), _Parts, Host:Port,
  540                                StreamPair, Options, Options) :-
  541    !,
  542    tcp_connect(SocksHost:SocksPort, StreamPair, [bypass_proxy(true)]),
  543    catch(negotiate_socks_connection(Host:Port, StreamPair),
  544          Error,
  545          ( close(StreamPair, [force(true)]),
  546            throw(Error)
  547          )).
  548
  549%!  hooked_options(+Parts, -Options) is nondet.
  550%
  551%   Calls  http:open_options/2  and  if  necessary    upgrades  old  SSL
  552%   cacerts_file(File) option to a cacerts(List) option to ensure proper
  553%   merging of options.
  554
  555hooked_options(Parts, Options) :-
  556    http:open_options(Parts, Options0),
  557    upgrade_ssl_options(Parts, Options0, Options).
  558
  559:- if(current_predicate(ssl_upgrade_legacy_options/2)).  560upgrade_ssl_options(Parts, Options0, Options) :-
  561    requires_ssl(Parts),
  562    !,
  563    ssl_upgrade_legacy_options(Options0, Options).
  564:- endif.  565upgrade_ssl_options(_, Options, Options).
  566
  567merge_options_rev(Old, New, Merged) :-
  568    merge_options(New, Old, Merged).
  569
  570is_meta(pem_password_hook).             % SSL plugin callbacks
  571is_meta(cert_verify_hook).
  572
  573
  574http:http_protocol_hook(http, _, StreamPair, StreamPair, _).
  575
  576default_port(https, 443) :- !.
  577default_port(wss,   443) :- !.
  578default_port(_,     80).
  579
  580host_and_port(Host, DefPort, DefPort, Host) :- !.
  581host_and_port(Host, _,       Port,    Host:Port).
  582
  583%!  autoload_https(+Parts) is det.
  584%
  585%   If the requested scheme is https or wss, load the HTTPS plugin.
  586
  587autoload_https(Parts) :-
  588    requires_ssl(Parts),
  589    memberchk(scheme(S), Parts),
  590    \+ clause(http:http_protocol_hook(S, _, StreamPair, StreamPair, _),_),
  591    exists_source(library(http/http_ssl_plugin)),
  592    !,
  593    use_module(library(http/http_ssl_plugin)).
  594autoload_https(_).
  595
  596requires_ssl(Parts) :-
  597    memberchk(scheme(S), Parts),
  598    secure_scheme(S).
  599
  600secure_scheme(https).
  601secure_scheme(wss).
  602
  603%!  send_rec_header(+StreamPair, -Stream,
  604%!                  +Host, +RequestURI, +Parts, +Options) is det.
  605%
  606%   Send header to Out and process reply.  If there is an error or
  607%   failure, close In and Out and return the error or failure.
  608
  609send_rec_header(StreamPair, Stream, Host, RequestURI, Parts, Options) :-
  610    (   catch(guarded_send_rec_header(StreamPair, Stream,
  611                                      Host, RequestURI, Parts, Options),
  612              E, true)
  613    ->  (   var(E)
  614        ->  (   option(output(StreamPair), Options)
  615            ->  true
  616            ;   true
  617            )
  618        ;   close(StreamPair, [force(true)]),
  619            throw(E)
  620        )
  621    ;   close(StreamPair, [force(true)]),
  622        fail
  623    ).
  624
  625guarded_send_rec_header(StreamPair, Stream, Host, RequestURI, Parts, Options) :-
  626    user_agent(Agent, Options),
  627    method(Options, MNAME),
  628    http_version(Version),
  629    option(connection(Connection), Options, close),
  630    debug(http(send_request), "> ~w ~w HTTP/~w", [MNAME, RequestURI, Version]),
  631    debug(http(send_request), "> Host: ~w", [Host]),
  632    debug(http(send_request), "> User-Agent: ~w", [Agent]),
  633    debug(http(send_request), "> Connection: ~w", [Connection]),
  634    format(StreamPair,
  635           '~w ~w HTTP/~w\r\n\c
  636               Host: ~w\r\n\c
  637               User-Agent: ~w\r\n\c
  638               Connection: ~w\r\n',
  639           [MNAME, RequestURI, Version, Host, Agent, Connection]),
  640    parts_uri(Parts, URI),
  641    x_headers(Options, URI, StreamPair),
  642    write_cookies(StreamPair, Parts, Options),
  643    (   option(post(PostData), Options)
  644    ->  http_post_data(PostData, StreamPair, [])
  645    ;   format(StreamPair, '\r\n', [])
  646    ),
  647    flush_output(StreamPair),
  648                                    % read the reply header
  649    read_header(StreamPair, Parts, ReplyVersion, Code, Comment, Lines),
  650    update_cookies(Lines, Parts, Options),
  651    reply_header(Lines, Options),
  652    do_open(ReplyVersion, Code, Comment, Lines, Options, Parts, Host,
  653            StreamPair, Stream).
  654
  655
  656%!  http_version(-Version:atom) is det.
  657%
  658%   HTTP version we publish. We  can  only   use  1.1  if we support
  659%   chunked encoding.
  660
  661http_version('1.1') :-
  662    http:current_transfer_encoding(chunked),
  663    !.
  664http_version('1.1') :-
  665    autoload_encoding(chunked),
  666    !.
  667http_version('1.0').
  668
  669method(Options, MNAME) :-
  670    option(post(_), Options),
  671    !,
  672    option(method(M), Options, post),
  673    (   map_method(M, MNAME0)
  674    ->  MNAME = MNAME0
  675    ;   domain_error(method, M)
  676    ).
  677method(Options, MNAME) :-
  678    option(method(M), Options, get),
  679    (   map_method(M, MNAME0)
  680    ->  MNAME = MNAME0
  681    ;   map_method(_, M)
  682    ->  MNAME = M
  683    ;   domain_error(method, M)
  684    ).
  685
  686%!  map_method(+MethodID, -Method)
  687%
  688%   Support additional ``METHOD`` keywords.  Default   are  the official
  689%   HTTP methods as defined by the various RFCs.
  690
  691:- multifile
  692    map_method/2.  693
  694map_method(delete,  'DELETE').
  695map_method(get,     'GET').
  696map_method(head,    'HEAD').
  697map_method(post,    'POST').
  698map_method(put,     'PUT').
  699map_method(patch,   'PATCH').
  700map_method(options, 'OPTIONS').
  701
  702%!  x_headers(+Options, +URI, +Out) is det.
  703%
  704%   Emit extra headers from   request_header(Name=Value)  options in
  705%   Options.
  706%
  707%   @tbd Use user/password fields
  708
  709x_headers(Options, URI, Out) :-
  710    x_headers_(Options, [url(URI)|Options], Out).
  711
  712x_headers_([], _, _).
  713x_headers_([H|T], Options, Out) :-
  714    x_header(H, Options, Out),
  715    x_headers_(T, Options, Out).
  716
  717x_header(request_header(Name=Value), _, Out) :-
  718    !,
  719    debug(http(send_request), "> ~w: ~w", [Name, Value]),
  720    format(Out, '~w: ~w\r\n', [Name, Value]).
  721x_header(proxy_authorization(ProxyAuthorization), Options, Out) :-
  722    !,
  723    auth_header(ProxyAuthorization, Options, 'Proxy-Authorization', Out).
  724x_header(authorization(Authorization), Options, Out) :-
  725    !,
  726    auth_header(Authorization, Options, 'Authorization', Out).
  727x_header(range(Spec), _, Out) :-
  728    !,
  729    Spec =.. [Unit, From, To],
  730    (   To == end
  731    ->  ToT = ''
  732    ;   must_be(integer, To),
  733        ToT = To
  734    ),
  735    debug(http(send_request), "> Range: ~w=~d-~w", [Unit, From, ToT]),
  736    format(Out, 'Range: ~w=~d-~w\r\n', [Unit, From, ToT]).
  737x_header(_, _, _).
  738
  739%!  auth_header(+AuthOption, +Options, +HeaderName, +Out)
  740
  741auth_header(basic(User, Password), _, Header, Out) :-
  742    !,
  743    format(codes(Codes), '~w:~w', [User, Password]),
  744    phrase(base64(Codes), Base64Codes),
  745    debug(http(send_request), "> ~w: Basic ~s", [Header, Base64Codes]),
  746    format(Out, '~w: Basic ~s\r\n', [Header, Base64Codes]).
  747auth_header(bearer(Token), _, Header, Out) :-
  748    !,
  749    debug(http(send_request), "> ~w: Bearer ~w", [Header,Token]),
  750    format(Out, '~w: Bearer ~w\r\n', [Header, Token]).
  751auth_header(Auth, Options, _, Out) :-
  752    option(url(URL), Options),
  753    add_method(Options, Options1),
  754    http:authenticate_client(URL, send_auth_header(Auth, Out, Options1)),
  755    !.
  756auth_header(Auth, _, _, _) :-
  757    domain_error(authorization, Auth).
  758
  759user_agent(Agent, Options) :-
  760    (   option(user_agent(Agent), Options)
  761    ->  true
  762    ;   user_agent(Agent)
  763    ).
  764
  765add_method(Options0, Options) :-
  766    option(method(_), Options0),
  767    !,
  768    Options = Options0.
  769add_method(Options0, Options) :-
  770    option(post(_), Options0),
  771    !,
  772    Options = [method(post)|Options0].
  773add_method(Options0, [method(get)|Options0]).
  774
  775%!  do_open(+HTTPVersion, +HTTPStatusCode, +HTTPStatusComment, +Header,
  776%!          +Options, +Parts, +Host, +In, -FinalIn) is det.
  777%
  778%   Handle the HTTP status once available. If   200-299, we are ok. If a
  779%   redirect, redo the open,  returning  a   new  stream.  Else issue an
  780%   error.
  781%
  782%   @error  existence_error(url, URL)
  783
  784                                        % Redirections
  785do_open(_, Code, _, Lines, Options0, Parts, _, In, Stream) :-
  786    redirect_code(Code),
  787    option(redirect(true), Options0, true),
  788    location(Lines, RequestURI),
  789    !,
  790    debug(http(redirect), 'http_open: redirecting to ~w', [RequestURI]),
  791    close(In),
  792    parts_uri(Parts, Base),
  793    uri_resolve(RequestURI, Base, Redirected),
  794    parse_url_ex(Redirected, RedirectedParts),
  795    (   redirect_limit_exceeded(Options0, Max)
  796    ->  format(atom(Comment), 'max_redirect (~w) limit exceeded', [Max]),
  797        throw(error(permission_error(redirect, http, Redirected),
  798                    context(_, Comment)))
  799    ;   redirect_loop(RedirectedParts, Options0)
  800    ->  throw(error(permission_error(redirect, http, Redirected),
  801                    context(_, 'Redirection loop')))
  802    ;   true
  803    ),
  804    redirect_options(Parts, RedirectedParts, Options0, Options),
  805    http_open(RedirectedParts, Stream, Options).
  806                                        % Need authentication
  807do_open(_Version, Code, _Comment, Lines, Options0, Parts, _Host, In0, Stream) :-
  808    authenticate_code(Code),
  809    option(authenticate(true), Options0, true),
  810    parts_uri(Parts, URI),
  811    parse_headers(Lines, Headers),
  812    http:authenticate_client(
  813             URI,
  814             auth_reponse(Headers, Options0, Options)),
  815    !,
  816    close(In0),
  817    http_open(Parts, Stream, Options).
  818                                        % Accepted codes
  819do_open(Version, Code, _, Lines, Options, Parts, Host, In0, In) :-
  820    (   option(status_code(Code), Options),
  821        Lines \== []
  822    ->  true
  823    ;   successful_code(Code)
  824    ),
  825    !,
  826    parts_uri(Parts, URI),
  827    parse_headers(Lines, Headers),
  828    return_version(Options, Version),
  829    return_size(Options, Headers),
  830    return_fields(Options, Headers),
  831    return_headers(Options, [status_code(Code)|Headers]),
  832    consider_keep_alive(Lines, Parts, Host, In0, In1, Options),
  833    transfer_encoding_filter(Lines, In1, In, Options),
  834                                    % properly re-initialise the stream
  835    set_stream(In, file_name(URI)),
  836    set_stream(In, record_position(true)).
  837do_open(_, _, _, [], Options, _, _, _, _) :-
  838    option(connection(Connection), Options),
  839    keep_alive(Connection),
  840    !,
  841    throw(error(keep_alive(closed),_)).
  842                                        % report anything else as error
  843do_open(_Version, Code, Comment, _,  _, Parts, _, _, _) :-
  844    parts_uri(Parts, URI),
  845    (   map_error_code(Code, Error)
  846    ->  Formal =.. [Error, url, URI]
  847    ;   Formal = existence_error(url, URI)
  848    ),
  849    throw(error(Formal, context(_, status(Code, Comment)))).
  850
  851
  852successful_code(Code) :-
  853    between(200, 299, Code).
  854
  855%!  redirect_limit_exceeded(+Options:list(compound), -Max:nonneg) is semidet.
  856%
  857%   True if we have exceeded the maximum redirection length (default 10).
  858
  859redirect_limit_exceeded(Options, Max) :-
  860    option(visited(Visited), Options, []),
  861    length(Visited, N),
  862    option(max_redirect(Max), Options, 10),
  863    (Max == infinite -> fail ; N > Max).
  864
  865
  866%!  redirect_loop(+Parts, +Options) is semidet.
  867%
  868%   True if we are in  a  redirection   loop.  Note  that some sites
  869%   redirect once to the same place using  cookies or similar, so we
  870%   allow for two tries. In fact,   we  should probably test whether
  871%   authorization or cookie headers have changed.
  872
  873redirect_loop(Parts, Options) :-
  874    option(visited(Visited), Options, []),
  875    include(==(Parts), Visited, Same),
  876    length(Same, Count),
  877    Count > 2.
  878
  879
  880%!  redirect_options(+Parts, +RedirectedParts, +Options0, -Options) is det.
  881%
  882%   A redirect from a POST should do  a   GET  on the returned URI. This
  883%   means we must remove the method(post)   and  post(Data) options from
  884%   the original option-list.
  885%
  886%   If we are connecting over a Unix   domain socket we drop this option
  887%   if the redirect host does not match the initial host.
  888
  889redirect_options(Parts, RedirectedParts, Options0, Options) :-
  890    select_option(unix_socket(_), Options0, Options1),
  891    memberchk(host(Host), Parts),
  892    memberchk(host(RHost), RedirectedParts),
  893    debug(http(redirect), 'http_open: redirecting AF_UNIX ~w to ~w',
  894          [Host, RHost]),
  895    Host \== RHost,
  896    !,
  897    redirect_options(Options1, Options).
  898redirect_options(_, _, Options0, Options) :-
  899    redirect_options(Options0, Options).
  900
  901redirect_options(Options0, Options) :-
  902    (   select_option(post(_), Options0, Options1)
  903    ->  true
  904    ;   Options1 = Options0
  905    ),
  906    (   select_option(method(Method), Options1, Options),
  907        \+ redirect_method(Method)
  908    ->  true
  909    ;   Options = Options1
  910    ).
  911
  912redirect_method(delete).
  913redirect_method(get).
  914redirect_method(head).
  915
  916
  917%!  map_error_code(+HTTPCode, -PrologError) is semidet.
  918%
  919%   Map HTTP error codes to Prolog errors.
  920%
  921%   @tbd    Many more maps. Unfortunately many have no sensible Prolog
  922%           counterpart.
  923
  924map_error_code(401, permission_error).
  925map_error_code(403, permission_error).
  926map_error_code(404, existence_error).
  927map_error_code(405, permission_error).
  928map_error_code(407, permission_error).
  929map_error_code(410, existence_error).
  930
  931redirect_code(301).                     % Moved Permanently
  932redirect_code(302).                     % Found (previously "Moved Temporary")
  933redirect_code(303).                     % See Other
  934redirect_code(307).                     % Temporary Redirect
  935
  936authenticate_code(401).
  937
  938%!  open_socket(+Address, -StreamPair, +Options) is det.
  939%
  940%   Create and connect a client socket to Address.  Options
  941%
  942%       * timeout(+Timeout)
  943%       Sets timeout on the stream, *after* connecting the
  944%       socket.
  945%
  946%   @tbd    Make timeout also work on tcp_connect/4.
  947%   @tbd    This is the same as do_connect/4 in http_client.pl
  948
  949open_socket(Address, StreamPair, Options) :-
  950    debug(http(open), 'http_open: Connecting to ~p ...', [Address]),
  951    tcp_connect(Address, StreamPair, Options),
  952    stream_pair(StreamPair, In, Out),
  953    debug(http(open), '\tok ~p ---> ~p', [In, Out]),
  954    set_stream(In, record_position(false)),
  955    (   option(timeout(Timeout), Options)
  956    ->  set_stream(In, timeout(Timeout))
  957    ;   true
  958    ).
  959
  960
  961return_version(Options, Major-Minor) :-
  962    option(version(Major-Minor), Options, _).
  963
  964return_size(Options, Headers) :-
  965    (   memberchk(content_length(Size), Headers)
  966    ->  option(size(Size), Options, _)
  967    ;   true
  968    ).
  969
  970return_fields([], _).
  971return_fields([header(Name, Value)|T], Headers) :-
  972    !,
  973    (   Term =.. [Name,Value],
  974        memberchk(Term, Headers)
  975    ->  true
  976    ;   Value = ''
  977    ),
  978    return_fields(T, Headers).
  979return_fields([_|T], Lines) :-
  980    return_fields(T, Lines).
  981
  982return_headers(Options, Headers) :-
  983    option(headers(Headers), Options, _).
  984
  985%!  parse_headers(+Lines, -Headers:list(compound)) is det.
  986%
  987%   Parse the header lines for   the  headers(-List) option. Invalid
  988%   header   lines   are   skipped,   printing   a   warning   using
  989%   pring_message/2.
  990
  991parse_headers([], []) :- !.
  992parse_headers([Line|Lines], Headers) :-
  993    catch(http_parse_header(Line, [Header]), Error, true),
  994    (   var(Error)
  995    ->  Headers = [Header|More]
  996    ;   print_message(warning, Error),
  997        Headers = More
  998    ),
  999    parse_headers(Lines, More).
 1000
 1001
 1002%!  return_final_url(+Options) is semidet.
 1003%
 1004%   If Options contains final_url(URL), unify URL with the final
 1005%   URL after redirections.
 1006
 1007return_final_url(Options) :-
 1008    option(final_url(URL), Options),
 1009    var(URL),
 1010    !,
 1011    option(visited([Parts|_]), Options),
 1012    parts_uri(Parts, URL).
 1013return_final_url(_).
 1014
 1015
 1016%!  transfer_encoding_filter(+Lines, +In0, -In, +Options) is det.
 1017%
 1018%   Install filters depending on the transfer  encoding. If In0 is a
 1019%   stream-pair, we close the output   side. If transfer-encoding is
 1020%   not specified, the content-encoding is  interpreted as a synonym
 1021%   for transfer-encoding, because many   servers incorrectly depend
 1022%   on  this.  Exceptions  to  this   are  content-types  for  which
 1023%   disable_encoding_filter/1 holds.
 1024
 1025transfer_encoding_filter(Lines, In0, In, Options) :-
 1026    transfer_encoding(Lines, Encoding),
 1027    !,
 1028    transfer_encoding_filter_(Encoding, In0, In, Options).
 1029transfer_encoding_filter(Lines, In0, In, Options) :-
 1030    content_encoding(Lines, Encoding),
 1031    content_type(Lines, Type),
 1032    \+ http:disable_encoding_filter(Type),
 1033    !,
 1034    transfer_encoding_filter_(Encoding, In0, In, Options).
 1035transfer_encoding_filter(_, In, In, _Options).
 1036
 1037transfer_encoding_filter_(Encoding, In0, In, Options) :-
 1038    option(raw_encoding(Encoding), Options),
 1039    !,
 1040    In = In0.
 1041transfer_encoding_filter_(Encoding, In0, In, _Options) :-
 1042    stream_pair(In0, In1, Out),
 1043    (   nonvar(Out)
 1044    ->  close(Out)
 1045    ;   true
 1046    ),
 1047    (   http:encoding_filter(Encoding, In1, In)
 1048    ->  true
 1049    ;   autoload_encoding(Encoding),
 1050        http:encoding_filter(Encoding, In1, In)
 1051    ->  true
 1052    ;   domain_error(http_encoding, Encoding)
 1053    ).
 1054
 1055:- multifile
 1056    autoload_encoding/1. 1057
 1058:- if(exists_source(library(zlib))). 1059autoload_encoding(gzip) :-
 1060    use_module(library(zlib)).
 1061:- endif. 1062:- if(exists_source(library(http/http_stream))). 1063autoload_encoding(chunked) :-
 1064    use_module(library(http/http_stream)).
 1065:- endif. 1066
 1067content_type(Lines, Type) :-
 1068    member(Line, Lines),
 1069    phrase(field('content-type'), Line, Rest),
 1070    !,
 1071    atom_codes(Type, Rest).
 1072
 1073%!  http:disable_encoding_filter(+ContentType) is semidet.
 1074%
 1075%   Do not use  the   =|Content-encoding|=  as =|Transfer-encoding|=
 1076%   encoding for specific values of   ContentType. This predicate is
 1077%   multifile and can thus be extended by the user.
 1078
 1079http:disable_encoding_filter('application/x-gzip').
 1080http:disable_encoding_filter('application/x-tar').
 1081http:disable_encoding_filter('x-world/x-vrml').
 1082http:disable_encoding_filter('application/zip').
 1083http:disable_encoding_filter('application/x-gzip').
 1084http:disable_encoding_filter('application/x-zip-compressed').
 1085http:disable_encoding_filter('application/x-compress').
 1086http:disable_encoding_filter('application/x-compressed').
 1087http:disable_encoding_filter('application/x-spoon').
 1088
 1089%!  transfer_encoding(+Lines, -Encoding) is semidet.
 1090%
 1091%   True if Encoding  is  the   value  of  the =|Transfer-encoding|=
 1092%   header.
 1093
 1094transfer_encoding(Lines, Encoding) :-
 1095    what_encoding(transfer_encoding, Lines, Encoding).
 1096
 1097what_encoding(What, Lines, Encoding) :-
 1098    member(Line, Lines),
 1099    phrase(encoding_(What, Debug), Line, Rest),
 1100    !,
 1101    atom_codes(Encoding, Rest),
 1102    debug(http(What), '~w: ~p', [Debug, Rest]).
 1103
 1104encoding_(content_encoding, 'Content-encoding') -->
 1105    field('content-encoding').
 1106encoding_(transfer_encoding, 'Transfer-encoding') -->
 1107    field('transfer-encoding').
 1108
 1109%!  content_encoding(+Lines, -Encoding) is semidet.
 1110%
 1111%   True if Encoding is the value of the =|Content-encoding|=
 1112%   header.
 1113
 1114content_encoding(Lines, Encoding) :-
 1115    what_encoding(content_encoding, Lines, Encoding).
 1116
 1117%!  read_header(+In:stream, +Parts, -Version, -Code:int,
 1118%!  -Comment:atom, -Lines:list) is det.
 1119%
 1120%   Read the HTTP reply-header.  If the reply is completely empty
 1121%   an existence error is thrown.  If the replied header is
 1122%   otherwise invalid a 500 HTTP error is simulated, having the
 1123%   comment =|Invalid reply header|=.
 1124%
 1125%   @param Parts    A list of compound terms that describe the
 1126%                   parsed request URI.
 1127%   @param Version  HTTP reply version as Major-Minor pair
 1128%   @param Code     Numeric HTTP reply-code
 1129%   @param Comment  Comment of reply-code as atom
 1130%   @param Lines    Remaining header lines as code-lists.
 1131%
 1132%   @error existence_error(http_reply, Uri)
 1133
 1134read_header(In, Parts, Major-Minor, Code, Comment, Lines) :-
 1135    read_line_to_codes(In, Line),
 1136    (   Line == end_of_file
 1137    ->  parts_uri(Parts, Uri),
 1138        existence_error(http_reply,Uri)
 1139    ;   true
 1140    ),
 1141    Line \== end_of_file,
 1142    phrase(first_line(Major-Minor, Code, Comment), Line),
 1143    debug(http(open), 'HTTP/~d.~d ~w ~w', [Major, Minor, Code, Comment]),
 1144    read_line_to_codes(In, Line2),
 1145    rest_header(Line2, In, Lines),
 1146    !,
 1147    (   debugging(http(open))
 1148    ->  forall(member(HL, Lines),
 1149               debug(http(open), '~s', [HL]))
 1150    ;   true
 1151    ).
 1152read_header(_, _, 1-1, 500, 'Invalid reply header', []).
 1153
 1154rest_header([], _, []) :- !.            % blank line: end of header
 1155rest_header(L0, In, [L0|L]) :-
 1156    read_line_to_codes(In, L1),
 1157    rest_header(L1, In, L).
 1158
 1159%!  content_length(+Header, -Length:int) is semidet.
 1160%
 1161%   Find the Content-Length in an HTTP reply-header.
 1162
 1163content_length(Lines, Length) :-
 1164    member(Line, Lines),
 1165    phrase(content_length(Length0), Line),
 1166    !,
 1167    Length = Length0.
 1168
 1169location(Lines, RequestURI) :-
 1170    member(Line, Lines),
 1171    phrase(atom_field(location, RequestURI), Line),
 1172    !.
 1173
 1174connection(Lines, Connection) :-
 1175    member(Line, Lines),
 1176    phrase(atom_field(connection, Connection0), Line),
 1177    !,
 1178    Connection = Connection0.
 1179
 1180first_line(Major-Minor, Code, Comment) -->
 1181    "HTTP/", integer(Major), ".", integer(Minor),
 1182    skip_blanks,
 1183    integer(Code),
 1184    skip_blanks,
 1185    rest(Comment).
 1186
 1187atom_field(Name, Value) -->
 1188    field(Name),
 1189    rest(Value).
 1190
 1191content_length(Len) -->
 1192    field('content-length'),
 1193    integer(Len).
 1194
 1195field(Name) -->
 1196    { atom_codes(Name, Codes) },
 1197    field_codes(Codes).
 1198
 1199field_codes([]) -->
 1200    ":",
 1201    skip_blanks.
 1202field_codes([H|T]) -->
 1203    [C],
 1204    { match_header_char(H, C)
 1205    },
 1206    field_codes(T).
 1207
 1208match_header_char(C, C) :- !.
 1209match_header_char(C, U) :-
 1210    code_type(C, to_lower(U)),
 1211    !.
 1212match_header_char(0'_, 0'-).
 1213
 1214
 1215skip_blanks -->
 1216    [C],
 1217    { code_type(C, white)
 1218    },
 1219    !,
 1220    skip_blanks.
 1221skip_blanks -->
 1222    [].
 1223
 1224%!  integer(-Int)//
 1225%
 1226%   Read 1 or more digits and return as integer.
 1227
 1228integer(Code) -->
 1229    digit(D0),
 1230    digits(D),
 1231    { number_codes(Code, [D0|D])
 1232    }.
 1233
 1234digit(C) -->
 1235    [C],
 1236    { code_type(C, digit)
 1237    }.
 1238
 1239digits([D0|D]) -->
 1240    digit(D0),
 1241    !,
 1242    digits(D).
 1243digits([]) -->
 1244    [].
 1245
 1246%!  rest(-Atom:atom)//
 1247%
 1248%   Get rest of input as an atom.
 1249
 1250rest(Atom) --> call(rest_(Atom)).
 1251
 1252rest_(Atom, L, []) :-
 1253    atom_codes(Atom, L).
 1254
 1255
 1256%!  reply_header(+Lines, +Options) is det.
 1257%
 1258%   Return the entire reply header as  a list of strings to the option
 1259%   raw_headers(-Headers).
 1260
 1261reply_header(Lines, Options) :-
 1262    option(raw_headers(Headers), Options),
 1263    !,
 1264    maplist(string_codes, Headers, Lines).
 1265reply_header(_, _).
 1266
 1267
 1268                 /*******************************
 1269                 *   AUTHORIZATION MANAGEMENT   *
 1270                 *******************************/
 1271
 1272%!  http_set_authorization(+URL, +Authorization) is det.
 1273%
 1274%   Set user/password to supply with URLs   that have URL as prefix.
 1275%   If  Authorization  is  the   atom    =|-|=,   possibly   defined
 1276%   authorization is cleared.  For example:
 1277%
 1278%   ```
 1279%   ?- http_set_authorization('http://www.example.com/private/',
 1280%                             basic('John', 'Secret'))
 1281%   ```
 1282%
 1283%   @tbd    Move to a separate module, so http_get/3, etc. can use this
 1284%           too.
 1285
 1286:- dynamic
 1287    stored_authorization/2,
 1288    cached_authorization/2. 1289
 1290http_set_authorization(URL, Authorization) :-
 1291    must_be(atom, URL),
 1292    retractall(stored_authorization(URL, _)),
 1293    (   Authorization = (-)
 1294    ->  true
 1295    ;   check_authorization(Authorization),
 1296        assert(stored_authorization(URL, Authorization))
 1297    ),
 1298    retractall(cached_authorization(_,_)).
 1299
 1300check_authorization(Var) :-
 1301    var(Var),
 1302    !,
 1303    instantiation_error(Var).
 1304check_authorization(basic(User, Password)) :-
 1305    must_be(atom, User),
 1306    must_be(text, Password).
 1307check_authorization(digest(User, Password)) :-
 1308    must_be(atom, User),
 1309    must_be(text, Password).
 1310
 1311%!  authorization(+URL, -Authorization) is semidet.
 1312%
 1313%   True if Authorization must be supplied for URL.
 1314%
 1315%   @tbd    Cleanup cache if it gets too big.
 1316
 1317authorization(_, _) :-
 1318    \+ stored_authorization(_, _),
 1319    !,
 1320    fail.
 1321authorization(URL, Authorization) :-
 1322    cached_authorization(URL, Authorization),
 1323    !,
 1324    Authorization \== (-).
 1325authorization(URL, Authorization) :-
 1326    (   stored_authorization(Prefix, Authorization),
 1327        sub_atom(URL, 0, _, _, Prefix)
 1328    ->  assert(cached_authorization(URL, Authorization))
 1329    ;   assert(cached_authorization(URL, -)),
 1330        fail
 1331    ).
 1332
 1333add_authorization(_, Options, Options) :-
 1334    option(authorization(_), Options),
 1335    !.
 1336add_authorization(Parts, Options0, Options) :-
 1337    url_part(user(User), Parts),
 1338    url_part(password(Passwd), Parts),
 1339    !,
 1340    Options = [authorization(basic(User,Passwd))|Options0].
 1341add_authorization(Parts, Options0, Options) :-
 1342    stored_authorization(_, _) ->   % quick test to avoid work
 1343    parts_uri(Parts, URL),
 1344    authorization(URL, Auth),
 1345    !,
 1346    Options = [authorization(Auth)|Options0].
 1347add_authorization(_, Options, Options).
 1348
 1349
 1350%!  parse_url_ex(+URL, -Parts)
 1351%
 1352%   Parts:  Scheme,  Host,  Port,    User:Password,  RequestURI  (no
 1353%   fragment).
 1354
 1355parse_url_ex(URL, [uri(URL)|Parts]) :-
 1356    uri_components(URL, Components),
 1357    phrase(components(Components), Parts),
 1358    (   option(host(_), Parts)
 1359    ->  true
 1360    ;   domain_error(url, URL)
 1361    ).
 1362
 1363components(Components) -->
 1364    uri_scheme(Components),
 1365    uri_path(Components),
 1366    uri_authority(Components),
 1367    uri_request_uri(Components).
 1368
 1369uri_scheme(Components) -->
 1370    { uri_data(scheme, Components, Scheme), nonvar(Scheme) },
 1371    !,
 1372    [ scheme(Scheme)
 1373    ].
 1374uri_scheme(_) --> [].
 1375
 1376uri_path(Components) -->
 1377    { uri_data(path, Components, Path0), nonvar(Path0),
 1378      (   Path0 == ''
 1379      ->  Path = (/)
 1380      ;   Path = Path0
 1381      )
 1382    },
 1383    !,
 1384    [ path(Path)
 1385    ].
 1386uri_path(_) --> [].
 1387
 1388uri_authority(Components) -->
 1389    { uri_data(authority, Components, Auth), nonvar(Auth),
 1390      !,
 1391      uri_authority_components(Auth, Data)
 1392    },
 1393    [ authority(Auth) ],
 1394    auth_field(user, Data),
 1395    auth_field(password, Data),
 1396    auth_field(host, Data),
 1397    auth_field(port, Data).
 1398uri_authority(_) --> [].
 1399
 1400auth_field(Field, Data) -->
 1401    { uri_authority_data(Field, Data, EncValue), nonvar(EncValue),
 1402      !,
 1403      (   atom(EncValue)
 1404      ->  uri_encoded(query_value, Value, EncValue)
 1405      ;   Value = EncValue
 1406      ),
 1407      Part =.. [Field,Value]
 1408    },
 1409    [ Part ].
 1410auth_field(_, _) --> [].
 1411
 1412uri_request_uri(Components) -->
 1413    { uri_data(path, Components, Path0),
 1414      uri_data(search, Components, Search),
 1415      (   Path0 == ''
 1416      ->  Path = (/)
 1417      ;   Path = Path0
 1418      ),
 1419      uri_data(path, Components2, Path),
 1420      uri_data(search, Components2, Search),
 1421      uri_components(RequestURI, Components2)
 1422    },
 1423    [ request_uri(RequestURI)
 1424    ].
 1425
 1426%!  parts_scheme(+Parts, -Scheme) is det.
 1427%!  parts_uri(+Parts, -URI) is det.
 1428%!  parts_request_uri(+Parts, -RequestURI) is det.
 1429%!  parts_search(+Parts, -Search) is det.
 1430%!  parts_authority(+Parts, -Authority) is semidet.
 1431
 1432parts_scheme(Parts, Scheme) :-
 1433    url_part(scheme(Scheme), Parts),
 1434    !.
 1435parts_scheme(Parts, Scheme) :-          % compatibility with library(url)
 1436    url_part(protocol(Scheme), Parts),
 1437    !.
 1438parts_scheme(_, http).
 1439
 1440parts_authority(Parts, Auth) :-
 1441    url_part(authority(Auth), Parts),
 1442    !.
 1443parts_authority(Parts, Auth) :-
 1444    url_part(host(Host), Parts, _),
 1445    url_part(port(Port), Parts, _),
 1446    url_part(user(User), Parts, _),
 1447    url_part(password(Password), Parts, _),
 1448    uri_authority_components(Auth,
 1449                             uri_authority(User, Password, Host, Port)).
 1450
 1451parts_request_uri(Parts, RequestURI) :-
 1452    option(request_uri(RequestURI), Parts),
 1453    !.
 1454parts_request_uri(Parts, RequestURI) :-
 1455    url_part(path(Path), Parts, /),
 1456    ignore(parts_search(Parts, Search)),
 1457    uri_data(path, Data, Path),
 1458    uri_data(search, Data, Search),
 1459    uri_components(RequestURI, Data).
 1460
 1461parts_search(Parts, Search) :-
 1462    option(query_string(Search), Parts),
 1463    !.
 1464parts_search(Parts, Search) :-
 1465    option(search(Fields), Parts),
 1466    !,
 1467    uri_query_components(Search, Fields).
 1468
 1469
 1470parts_uri(Parts, URI) :-
 1471    option(uri(URI), Parts),
 1472    !.
 1473parts_uri(Parts, URI) :-
 1474    parts_scheme(Parts, Scheme),
 1475    ignore(parts_authority(Parts, Auth)),
 1476    parts_request_uri(Parts, RequestURI),
 1477    uri_components(RequestURI, Data),
 1478    uri_data(scheme, Data, Scheme),
 1479    uri_data(authority, Data, Auth),
 1480    uri_components(URI, Data).
 1481
 1482parts_port(Parts, Port) :-
 1483    parts_scheme(Parts, Scheme),
 1484    default_port(Scheme, DefPort),
 1485    url_part(port(Port), Parts, DefPort).
 1486
 1487url_part(Part, Parts) :-
 1488    Part =.. [Name,Value],
 1489    Gen =.. [Name,RawValue],
 1490    option(Gen, Parts),
 1491    !,
 1492    Value = RawValue.
 1493
 1494url_part(Part, Parts, Default) :-
 1495    Part =.. [Name,Value],
 1496    Gen =.. [Name,RawValue],
 1497    (   option(Gen, Parts)
 1498    ->  Value = RawValue
 1499    ;   Value = Default
 1500    ).
 1501
 1502
 1503                 /*******************************
 1504                 *            COOKIES           *
 1505                 *******************************/
 1506
 1507write_cookies(Out, Parts, Options) :-
 1508    http:write_cookies(Out, Parts, Options),
 1509    !.
 1510write_cookies(_, _, _).
 1511
 1512update_cookies(_, _, _) :-
 1513    predicate_property(http:update_cookies(_,_,_), number_of_clauses(0)),
 1514    !.
 1515update_cookies(Lines, Parts, Options) :-
 1516    (   member(Line, Lines),
 1517        phrase(atom_field('set_cookie', CookieData), Line),
 1518        http:update_cookies(CookieData, Parts, Options),
 1519        fail
 1520    ;   true
 1521    ).
 1522
 1523
 1524                 /*******************************
 1525                 *           OPEN ANY           *
 1526                 *******************************/
 1527
 1528:- multifile iostream:open_hook/6. 1529
 1530%!  iostream:open_hook(+Spec, +Mode, -Stream, -Close,
 1531%!                     +Options0, -Options) is semidet.
 1532%
 1533%   Hook implementation that makes  open_any/5   support  =http= and
 1534%   =https= URLs for =|Mode == read|=.
 1535
 1536iostream:open_hook(URL, read, Stream, Close, Options0, Options) :-
 1537    (atom(URL) -> true ; string(URL)),
 1538    uri_is_global(URL),
 1539    uri_components(URL, Components),
 1540    uri_data(scheme, Components, Scheme),
 1541    http_scheme(Scheme),
 1542    !,
 1543    Options = Options0,
 1544    Close = close(Stream),
 1545    http_open(URL, Stream, Options0).
 1546
 1547http_scheme(http).
 1548http_scheme(https).
 1549
 1550
 1551                 /*******************************
 1552                 *          KEEP-ALIVE          *
 1553                 *******************************/
 1554
 1555%!  consider_keep_alive(+HeaderLines, +Parts, +Host,
 1556%!                      +Stream0, -Stream,
 1557%!                      +Options) is det.
 1558%
 1559%   If we have agree on a Keep-alive   connection, return a range stream
 1560%   rather than the original stream. We also  use the content length and
 1561%   a range stream if we are dealing   with an HTTPS connection. This is
 1562%   because not all servers seem to  complete the TLS closing handshake.
 1563%   If the server does not complete  this   we  receive  a TLS handshake
 1564%   error on end-of-file, causing the read to fail.
 1565
 1566consider_keep_alive(Lines, Parts, Host, StreamPair, In, Options) :-
 1567    option(connection(Asked), Options),
 1568    keep_alive(Asked),
 1569    connection(Lines, Given),
 1570    keep_alive(Given),
 1571    content_length(Lines, Bytes),
 1572    !,
 1573    stream_pair(StreamPair, In0, _),
 1574    connection_address(Host, Parts, HostPort),
 1575    debug(http(connection),
 1576          'Keep-alive to ~w (~D bytes)', [HostPort, Bytes]),
 1577    stream_range_open(In0, In,
 1578                      [ size(Bytes),
 1579                        onclose(keep_alive(StreamPair, HostPort))
 1580                      ]).
 1581consider_keep_alive(Lines, Parts, _Host, StreamPair, In, _Options) :-
 1582    memberchk(scheme(https), Parts),
 1583    content_length(Lines, Bytes),
 1584    !,
 1585    stream_pair(StreamPair, In0, _),
 1586    stream_range_open(In0, In,
 1587                      [ size(Bytes),
 1588                        onclose(close_range(StreamPair))
 1589                      ]).
 1590consider_keep_alive(_, _, _, Stream, Stream, _).
 1591
 1592connection_address(Host, _, Host) :-
 1593    Host = _:_,
 1594    !.
 1595connection_address(Host, Parts, Host:Port) :-
 1596    parts_port(Parts, Port).
 1597
 1598keep_alive(keep_alive) :- !.
 1599keep_alive(Connection) :-
 1600    downcase_atom(Connection, 'keep-alive').
 1601
 1602:- public keep_alive/4. 1603
 1604keep_alive(StreamPair, Host, _In, 0) :-
 1605    !,
 1606    debug(http(connection), 'Adding connection to ~p to pool', [Host]),
 1607    add_to_pool(Host, StreamPair).
 1608keep_alive(StreamPair, Host, In, Left) :-
 1609    Left < 100,
 1610    debug(http(connection), 'Reading ~D left bytes', [Left]),
 1611    read_incomplete(In, Left),
 1612    add_to_pool(Host, StreamPair),
 1613    !.
 1614keep_alive(StreamPair, _, _, _) :-
 1615    debug(http(connection),
 1616          'Closing connection due to excessive unprocessed input', []),
 1617    (   debugging(http(connection))
 1618    ->  catch(close(StreamPair), E,
 1619              print_message(warning, E))
 1620    ;   close(StreamPair, [force(true)])
 1621    ).
 1622
 1623:- public close_range/3. 1624close_range(StreamPair, _Raw, _BytesLeft) :-
 1625    close(StreamPair, [force(true)]).
 1626
 1627%!  read_incomplete(+In, +Left) is semidet.
 1628%
 1629%   If we have not all input from  a Keep-alive connection, read the
 1630%   remainder if it is short. Else, we fail and close the stream.
 1631
 1632read_incomplete(In, Left) :-
 1633    catch(setup_call_cleanup(
 1634              open_null_stream(Null),
 1635              copy_stream_data(In, Null, Left),
 1636              close(Null)),
 1637          _,
 1638          fail).
 1639
 1640:- dynamic
 1641    connection_pool/4,              % Hash, Address, Stream, Time
 1642    connection_gc_time/1. 1643
 1644add_to_pool(Address, StreamPair) :-
 1645    keep_connection(Address),
 1646    get_time(Now),
 1647    term_hash(Address, Hash),
 1648    assertz(connection_pool(Hash, Address, StreamPair, Now)).
 1649
 1650get_from_pool(Address, StreamPair) :-
 1651    term_hash(Address, Hash),
 1652    retract(connection_pool(Hash, Address, StreamPair, _)).
 1653
 1654%!  keep_connection(+Address) is semidet.
 1655%
 1656%   Succeeds if we want to keep   the  connection open. We currently
 1657%   keep a maximum of 10 connections  waiting   and  a  maximum of 2
 1658%   waiting for the same address. Connections   older than 2 seconds
 1659%   are closed.
 1660
 1661keep_connection(Address) :-
 1662    close_old_connections(2),
 1663    predicate_property(connection_pool(_,_,_,_), number_of_clauses(C)),
 1664    C =< 10,
 1665    term_hash(Address, Hash),
 1666    aggregate_all(count, connection_pool(Hash, Address, _, _), Count),
 1667    Count =< 2.
 1668
 1669close_old_connections(Timeout) :-
 1670    get_time(Now),
 1671    Before is Now - Timeout,
 1672    (   connection_gc_time(GC),
 1673        GC > Before
 1674    ->  true
 1675    ;   (   retractall(connection_gc_time(_)),
 1676            asserta(connection_gc_time(Now)),
 1677            connection_pool(Hash, Address, StreamPair, Added),
 1678            Added < Before,
 1679            retract(connection_pool(Hash, Address, StreamPair, Added)),
 1680            debug(http(connection),
 1681                  'Closing inactive keep-alive to ~p', [Address]),
 1682            close(StreamPair, [force(true)]),
 1683            fail
 1684        ;   true
 1685        )
 1686    ).
 1687
 1688
 1689%!  http_close_keep_alive(+Address) is det.
 1690%
 1691%   Close all keep-alive connections matching Address. Address is of
 1692%   the  form  Host:Port.  In  particular,  http_close_keep_alive(_)
 1693%   closes all currently known keep-alive connections.
 1694
 1695http_close_keep_alive(Address) :-
 1696    forall(get_from_pool(Address, StreamPair),
 1697           close(StreamPair, [force(true)])).
 1698
 1699%!  keep_alive_error(+Error, +StreamPair)
 1700%
 1701%   Deal with an error from  reusing   a  keep-alive  connection. If the
 1702%   error is due to an I/O error  or end-of-file, fail to backtrack over
 1703%   get_from_pool/2. Otherwise it is a real   error and we thus re-raise
 1704%   it. In all cases we close StreamPair rather than returning it to the
 1705%   pool as we may have done a partial read and thus be out of sync wrt.
 1706%   the HTTP protocol.
 1707
 1708keep_alive_error(error(keep_alive(closed), _), _) :-
 1709    !,
 1710    debug(http(connection), 'Keep-alive connection was closed', []),
 1711    fail.
 1712keep_alive_error(error(io_error(_,_), _), StreamPair) :-
 1713    !,
 1714    close(StreamPair, [force(true)]),
 1715    debug(http(connection), 'IO error on Keep-alive connection', []),
 1716    fail.
 1717keep_alive_error(error(existence_error(http_reply, _URL), _), _) :-
 1718    !,
 1719    debug(http(connection), 'Got empty reply on Keep-alive connection', []),
 1720    fail.
 1721keep_alive_error(Error, StreamPair) :-
 1722    close(StreamPair, [force(true)]),
 1723    throw(Error).
 1724
 1725
 1726                 /*******************************
 1727                 *     HOOK DOCUMENTATION       *
 1728                 *******************************/
 1729
 1730%!  http:open_options(+Parts, -Options) is nondet.
 1731%
 1732%   This hook is used by the HTTP   client library to define default
 1733%   options based on the the broken-down request-URL.  The following
 1734%   example redirects all trafic, except for localhost over a proxy:
 1735%
 1736%       ```
 1737%       :- multifile
 1738%           http:open_options/2.
 1739%
 1740%       http:open_options(Parts, Options) :-
 1741%           option(host(Host), Parts),
 1742%           Host \== localhost,
 1743%           Options = [proxy('proxy.local', 3128)].
 1744%       ```
 1745%
 1746%   This hook may return multiple   solutions.  The returned options
 1747%   are  combined  using  merge_options/3  where  earlier  solutions
 1748%   overrule later solutions.
 1749
 1750%!  http:write_cookies(+Out, +Parts, +Options) is semidet.
 1751%
 1752%   Emit a =|Cookie:|= header for the  current connection. Out is an
 1753%   open stream to the HTTP server, Parts is the broken-down request
 1754%   (see uri_components/2) and Options is the list of options passed
 1755%   to http_open.  The predicate is called as if using ignore/1.
 1756%
 1757%   @see complements http:update_cookies/3.
 1758%   @see library(http/http_cookie) implements cookie handling on
 1759%   top of these hooks.
 1760
 1761%!  http:update_cookies(+CookieData, +Parts, +Options) is semidet.
 1762%
 1763%   Update the cookie database.  CookieData  is   the  value  of the
 1764%   =|Set-Cookie|= field, Parts is  the   broken-down  request  (see
 1765%   uri_components/2) and Options is the list   of options passed to
 1766%   http_open.
 1767%
 1768%   @see complements http:write_cookies
 1769%   @see library(http/http_cookies) implements cookie handling on
 1770%   top of these hooks.