View source with raw comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2020, VU University Amsterdam
    7                         CWI, Amsterdam
    8    All rights reserved.
    9
   10    Redistribution and use in source and binary forms, with or without
   11    modification, are permitted provided that the following conditions
   12    are met:
   13
   14    1. Redistributions of source code must retain the above copyright
   15       notice, this list of conditions and the following disclaimer.
   16
   17    2. Redistributions in binary form must reproduce the above copyright
   18       notice, this list of conditions and the following disclaimer in
   19       the documentation and/or other materials provided with the
   20       distribution.
   21
   22    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   23    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   24    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   25    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   26    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   27    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   28    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   29    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   30    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   31    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   32    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   33    POSSIBILITY OF SUCH DAMAGE.
   34*/
   35
   36:- module(hashtable,
   37          [ ht_new/1,                   % --HT
   38            ht_is_hashtable/1,          % @HT
   39            ht_size/2,                  % +HT, -Size
   40
   41            ht_put/3,                   % !HT, +Key, +Value
   42            ht_update/4,                % +HT, +Key, ?Old, +New
   43            ht_put_new/3,               % !HT, +Key, +Value
   44            ht_put/5,                   % !HT, +Key, +Value, +IfNew, -Old
   45            ht_del/3,                   % !HT, +Key, -Value
   46
   47            ht_get/3,                   % +HT, +Key, -Value
   48            ht_gen/3,                   % +HT, ?Key, ?Value
   49            ht_pairs/2,                 % ?HT, ?Pairs
   50            ht_keys/2                   % +HT, -Keys
   51          ]).   52:- autoload(library(error), [must_be/2]).

Hash tables

Hash tables are one of the many key-value representations available to SWI-Prolog.

This module implements a hash table as a mutable and backtrackable data structure. The hash table is implemented as a closed hash table, where the buckets array is implemented using an unbounded arity compound term. Elements in this array are manipulated using setarg/3.

Hash tables allow for any Prolog data types as keys or values, except that the key cannot be a variable. Applications that require a plain variable as key can do so by wrapping all keys in a compound, e.g., k(Var). */

   70/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
   71Data structure
   72
   73    ht(Load, Size, Table)
   74- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 ht_new(--HT)
Create a new hash table.
   80ht_new(ht(0,0,[](_))).
 ht_is_hashtable(@HT) is semidet
True when HT is a hash table.
   86ht_is_hashtable(HT) :-
   87    nonvar(HT),
   88    HT = ht(Load, Size, Buckets),
   89    integer(Load),
   90    integer(Size),
   91    compound_name_arity(Buckets, [], Arity),
   92    Arity =:= Size*2+1.
 ht_size(+HT, -Count) is det
True when Size is the number of key-value pairs in HT.
   98ht_size(ht(Count, _Size, _Buckets), Count).
 ht_put(!HT, +Key, +Value) is det
Add a Key-Value to HT. The binding is undone on backtracking.
  104ht_put(HT, Key, Value) :-
  105    must_be(nonvar, Key),
  106    ht_put(HT, Key, Value, _, _, _).
 ht_put_new(!HT, +Key, +Value) is semidet
As ht_put/3, but fails if Key is already in HT instead of updating the associated value.
  113ht_put_new(HT, Key, Value) :-
  114    must_be(nonvar, Key),
  115    ht_put(HT, Key, Value, _, _, true).
 ht_update(+HT, +Key, ?Old, +New) is semidet
True when HT holds Key-Old before and Key-New after this call. Note that it is possible to update to a variable and the instantiate this. For example, a word-count update could be implemented as:
update_word_count(HT, Word) :-
    (   ht_update(HT, Word, Old, New)
    ->  New is Old+1
    ;   ht_put(HT, Word, 1)
    ).
  131ht_update(HT, Key, Old, New) :-
  132    must_be(nonvar, Key),
  133    ht_put(HT, Key, New, _, Old, false).
 ht_put(!HT, +Key, +Value, +IfNew, -Old) is det
Add Key-Value to HT. Old is unified with the old value associated with Key or, if Key is new, with IfNew. This can be used to bootstrap managing a list of values, e.g.
ht_put_list(HT, Key, Value) :-
    ht_put(HT, Key, [Value|Tail], [], Tail).
  144ht_put(HT, Key, Value, IfNew, Old) :-
  145    must_be(nonvar, Key),
  146    ht_put(HT, Key, Value, IfNew, Old, _).
  147
  148ht_put(HT, Key, Value, IfNew, Old, IsNew) :-
  149    HT = ht(Load, Size, Buckets),
  150    (   Load >= Size//2
  151    ->  ht_resize(HT),
  152        ht_put(HT, Key, Value, IfNew, Old, IsNew)
  153    ;   variant_hash(Key, I0),
  154        I is I0 mod Size,
  155        put_(Buckets, I, Size, Key, Old, IfNew, Value, IsNew),
  156        (   IsNew == true
  157        ->  Load2 is Load+1,
  158            setarg(1, HT, Load2)
  159        ;   true
  160        )
  161    ).
  162
  163put_(Buckets, I, Size, Key, Old, IfNew, Value, IsNew) :-
  164    ht_kv(Buckets, I, K, V),
  165    (   var(K)
  166    ->  IsNew = true,
  167        Old = IfNew,
  168        K = Key,
  169        V = Value
  170    ;   K == Key
  171    ->  IsNew = false,
  172        Old = V,
  173        ht_put_v(Buckets, I, Value)
  174    ;   I2 is (I+1) mod Size,
  175        put_(Buckets, I2, Size, Key, Old, IfNew, Value, IsNew)
  176    ).
  177
  178ht_resize(HT) :-
  179    HT = ht(_Load, Size, Buckets),
  180    NewSize is max(4, Size*2),
  181    NewArity is NewSize*2+1,
  182    compound_name_arity(NewBuckets, [], NewArity),
  183    copy_members(0, Size, Buckets, NewSize, NewBuckets),
  184    setarg(2, HT, NewSize),
  185    setarg(3, HT, NewBuckets).
  186
  187copy_members(I, OSize, OBuckets, NSize, NBuckets) :-
  188    I < OSize,
  189    !,
  190    ht_kv(OBuckets, I, K, V),
  191    (   nonvar(K)
  192    ->  variant_hash(K, I0),
  193        NI is I0 mod NSize,
  194        copy_(NBuckets, NI, NSize, K, V)
  195    ;   true
  196    ),
  197    I2 is I+1,
  198    copy_members(I2, OSize, OBuckets, NSize, NBuckets).
  199copy_members(_, _, _, _, _).
  200
  201copy_(Buckets, I, Size, Key, Value) :-
  202    ht_kv(Buckets, I, K, V),
  203    (   var(K)
  204    ->  K = Key,
  205        V = Value
  206    ;   I2 is (I+1) mod Size,
  207        copy_(Buckets, I2, Size, Key, Value)
  208    ).
 ht_del(!HT, +Key, -Value) is semidet
Delete Key-Value from HT. Fails if Key does not appear in HT or Value does not unify with the old associated value.
  216ht_del(HT, Key, Value) :-
  217    must_be(nonvar, Key),
  218    HT = ht(Load, Size, Buckets),
  219    Load > 0,
  220    variant_hash(Key, I0),
  221    I is I0 mod Size,
  222    del_(Buckets, I, Size, Key, Value),
  223    Load2 is Load - 1,
  224    setarg(1, HT, Load2).
  225
  226del_(Buckets, I, Size, Key, Value) :-
  227    ht_kv(Buckets, I, K, V),
  228    (   var(K)
  229    ->  fail
  230    ;   K == Key
  231    ->  V = Value,
  232        ht_put_kv(Buckets, I, _, _),
  233        del_shift(Buckets, I, I, Size)
  234    ;   I2 is (I+1) mod Size,
  235        del_(Buckets, I2, Size, Key, Value)
  236    ).
  237
  238del_shift(Buckets, I0, J, Size) :-
  239    I is (I0+1) mod Size,
  240    ht_kv(Buckets, I, K, V),
  241    (   var(K)
  242    ->  true
  243    ;   variant_hash(K, Hash),
  244        R is Hash mod Size,
  245        (   (   I >= R, R > J
  246            ;   R > J, J > I
  247            ;   J > I, I >= R
  248            )
  249        ->  del_shift(Buckets, I, J, Size)
  250        ;   ht_put_kv(Buckets, J, K, V),
  251            ht_put_kv(Buckets, I, _, _),
  252            del_shift(Buckets, I, I, Size)
  253        )
  254    ).
 ht_get(+HT, +Key, -Value) is semidet
True when Key is in HT and associated with Value.
  261ht_get(ht(Load, Size, Buckets), Key, Value) :-
  262    Load > 0,
  263    must_be(nonvar, Key),
  264    variant_hash(Key, I0),
  265    I is I0 mod Size,
  266    get_(Buckets, I, Size, Key, Value).
  267
  268get_(Buckets, I, Size, Key, Value) :-
  269    ht_kv(Buckets, I, K, V),
  270    (   Key == K
  271    ->  Value = V
  272    ;   nonvar(K)
  273    ->  I2 is (I+1) mod Size,
  274        get_(Buckets, I2, Size, Key, Value)
  275    ).
  276
  277ht_k(Buckets, I, K) :-
  278    IK is I*2+1,
  279    arg(IK, Buckets, K).
  280
  281ht_kv(Buckets, I, K, V) :-
  282    IK is I*2+1,
  283    IV is IK+1,
  284    arg(IK, Buckets, K),
  285    arg(IV, Buckets, V).
  286
  287ht_put_kv(Buckets, I, K, V) :-
  288    IK is I*2+1,
  289    IV is IK+1,
  290    setarg(IK, Buckets, K),
  291    setarg(IV, Buckets, V).
  292
  293ht_put_v(Buckets, I, V) :-
  294    IV is I*2+2,
  295    setarg(IV, Buckets, V).
 ht_gen(+HT, ?Key, ?Value) is nondet
True when Key-Value is in HT. Pairs are enumerated on backtracking using the hash table order.
  302ht_gen(HT, Key, Value) :-
  303    HT = ht(_, Size, Buckets),
  304    End is Size - 1,
  305    between(0, End, I),
  306    ht_kv(Buckets, I, K, V),
  307    nonvar(K),
  308    K = Key,
  309    V = Value.
 ht_pairs(?HT, ?Pairs) is det
True when Pairs and HT represent the same association. When used in mode (+,-), Pairs is an ordered set.
  316ht_pairs(HT, Pairs) :-
  317    ht_is_hashtable(HT),
  318    !,
  319    HT = ht(_Load, Size, Buckets),
  320    pairs_(0, Size, Buckets, Pairs0),
  321    sort(Pairs0, Pairs).
  322ht_pairs(HT, Pairs) :-
  323    must_be(list(pair), Pairs),
  324    ht_new(HT),
  325    ht_fill(Pairs, HT).
  326
  327pairs_(I, Size, Buckets, Pairs) :-
  328    (   I < Size
  329    ->  ht_kv(Buckets, I, K, V),
  330        (   nonvar(K)
  331        ->  Pairs = [K-V|T],
  332            I2 is I+1,
  333            pairs_(I2, Size, Buckets, T)
  334        ;   I2 is I+1,
  335            pairs_(I2, Size, Buckets, Pairs)
  336        )
  337    ;   Pairs = []
  338    ).
  339
  340ht_fill([], _).
  341ht_fill([K-V|T], HT) :-
  342    ht_put(HT, K, V),
  343    ht_fill(T, HT).
 ht_keys(+HT, -Keys) is det
True when Keys is an ordered set of all keys in HT.
  349ht_keys(HT, Keys) :-
  350    HT = ht(_Load, Size, Buckets),
  351    keys_(0, Size, Buckets, Keys0),
  352    sort(Keys0, Keys).
  353
  354keys_(I, Size, Buckets, Keys) :-
  355    (   I < Size
  356    ->  ht_k(Buckets, I, K),
  357        (   nonvar(K)
  358        ->  Keys = [K|T],
  359            I2 is I+1,
  360            keys_(I2, Size, Buckets, T)
  361        ;   I2 is I+1,
  362            keys_(I2, Size, Buckets, Keys)
  363        )
  364    ;   Keys = []
  365    )