View source with formatted comments or as raw
    1/*  Part of SWISH
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2015-2017, VU University Amsterdam
    7			      CWI Amsterdam
    8    All rights reserved.
    9
   10    Redistribution and use in source and binary forms, with or without
   11    modification, are permitted provided that the following conditions
   12    are met:
   13
   14    1. Redistributions of source code must retain the above copyright
   15       notice, this list of conditions and the following disclaimer.
   16
   17    2. Redistributions in binary form must reproduce the above copyright
   18       notice, this list of conditions and the following disclaimer in
   19       the documentation and/or other materials provided with the
   20       distribution.
   21
   22    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   23    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   24    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   25    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   26    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   27    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   28    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   29    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   30    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   31    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   32    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   33    POSSIBILITY OF SUCH DAMAGE.
   34*/
   35
   36:- module(gitty_driver_files,
   37	  [ gitty_close/1,		% +Store
   38	    gitty_file/4,		% +Store, ?Name, ?Ext, ?Hash
   39
   40	    gitty_update_head/4,	% +Store, +Name, +OldCommit, +NewCommit
   41	    delete_head/2,		% +Store, +Name
   42	    set_head/3,			% +Store, +Name, +Hash
   43	    store_object/4,		% +Store, +Hash, +Header, +Data
   44	    delete_object/2,		% +Store, +Hash
   45
   46	    gitty_hash/2,		% +Store, ?Hash
   47	    load_plain_commit/3,	% +Store, +Hash, -Meta
   48	    load_object/5,		% +Store, +Hash, -Data, -Type, -Size
   49	    gitty_object_file/3,	% +Store, +Hash, -File
   50
   51	    repack_objects/2,           % +Store, +Options
   52            pack_objects/6,             % +Store, +Objs, +Packs, +PackDir,
   53					% -File, +Opts
   54            unpack_packs/1,             % +Store
   55            unpack_pack/2,              % +Store, +PackFile
   56
   57            attach_pack/2,		% +Store, +PackFile
   58            gitty_fsck/1,               % +Store
   59            fsck_pack/1,                % +PackFile
   60            load_object_from_pack/4,	% +Hash, -Data, -Type, -Size
   61
   62	    gitty_rescan/1		% Store
   63	  ]).   64:- use_module(library(apply)).   65:- use_module(library(zlib)).   66:- use_module(library(filesex)).   67:- use_module(library(lists)).   68:- use_module(library(apply)).   69:- use_module(library(error)).   70:- use_module(library(debug)).   71:- use_module(library(zlib)).   72:- use_module(library(hash_stream)).   73:- use_module(library(option)).   74:- use_module(library(dcg/basics)).   75
   76/** <module> Gitty plain files driver
   77
   78This version of the driver uses plain files  to store the gitty data. It
   79consists of a nested directory  structure   with  files  named after the
   80hash. Objects and hash computation is the same as for `git`. The _heads_
   81(files) are computed on startup by scanning all objects. There is a file
   82=ref/head= that is updated if a head is updated. Other clients can watch
   83this file and update their notion  of   the  head. This implies that the
   84store can handle multiple clients that can  access a shared file system,
   85optionally shared using NFS from different machines.
   86
   87The store is simple and robust. The  main disadvantages are long startup
   88times as the store holds more objects and relatively high disk usage due
   89to rounding the small objects to disk allocation units.
   90
   91@bug	Shared access does not work on Windows.
   92*/
   93
   94:- dynamic
   95    head/4,				% Store, Name, Ext, Hash
   96    store/2,				% Store, Updated
   97    commit/3,				% Store, Hash, Meta
   98    heads_input_stream_cache/2,		% Store, Stream
   99    pack_object/6,                      % Hash, Type, Size, Offset, Store,PackFile
  100    attached_packs/1,                   % Store
  101    attached_pack/2.                    % Store, PackFile
  102
  103:- volatile
  104    head/4,
  105    store/2,
  106    commit/3,
  107    heads_input_stream_cache/2,
  108    pack_object/6,
  109    attached_packs/1,
  110    attached_pack/2.  111
  112:- multifile
  113    gitty:check_object/4.  114
  115% enable/disable syncing remote servers running on  the same file store.
  116% This facility requires shared access to files and thus doesn't work on
  117% Windows.
  118
  119:- if(current_prolog_flag(windows, true)).  120remote_sync(false).
  121:- else.  122remote_sync(true).
  123:- endif.  124
  125%!  gitty_close(+Store) is det.
  126%
  127%   Close resources associated with a store.
  128
  129gitty_close(Store) :-
  130    (   retract(heads_input_stream_cache(Store, In))
  131    ->  close(In)
  132    ;   true
  133    ),
  134    retractall(head(Store,_,_,_)),
  135    retractall(store(Store,_)),
  136    retractall(pack_object(_,_,_,_,Store,_)).
  137
  138
  139%%	gitty_file(+Store, ?File, ?Ext, ?Head) is nondet.
  140%
  141%	True when File entry in the  gitty   store  and Head is the HEAD
  142%	revision.
  143
  144gitty_file(Store, Head, Ext, Hash) :-
  145	gitty_scan(Store),
  146	head(Store, Head, Ext, Hash).
  147
  148%%	load_plain_commit(+Store, +Hash, -Meta:dict) is semidet.
  149%
  150%	Load the commit data as a  dict.   Loaded  commits are cached in
  151%	commit/3.  Note  that  only  adding  a  fact  to  the  cache  is
  152%	synchronized. This means that during  a   race  situation we may
  153%	load the same object  multiple  times   from  disk,  but this is
  154%	harmless while a lock  around   the  whole  predicate serializes
  155%	loading different objects, which is not needed.
  156
  157load_plain_commit(Store, Hash, Meta) :-
  158	must_be(atom, Store),
  159	must_be(atom, Hash),
  160	commit(Store, Hash, Meta), !.
  161load_plain_commit(Store, Hash, Meta) :-
  162	load_object(Store, Hash, String, _, _),
  163	term_string(Meta0, String, []),
  164	with_mutex(gitty_commit_cache,
  165		   assert_cached_commit(Store, Hash, Meta0)),
  166	Meta = Meta0.
  167
  168assert_cached_commit(Store, Hash, Meta) :-
  169	commit(Store, Hash, Meta0), !,
  170	assertion(Meta0 =@= Meta).
  171assert_cached_commit(Store, Hash, Meta) :-
  172	assertz(commit(Store, Hash, Meta)).
  173
  174%%	store_object(+Store, +Hash, +Header:string, +Data:string) is det.
  175%
  176%	Store the actual object. The store  must associate Hash with the
  177%	concatenation of Hdr and Data.
  178
  179store_object(Store, Hash, _Hdr, _Data) :-
  180        pack_object(Hash, _Type, _Size, _Offset, Store, _Pack), !.
  181store_object(Store, Hash, Hdr, Data) :-
  182        gitty_object_file(Store, Hash, Path),
  183        with_mutex(gitty_file, exists_or_create(Path, Out0)),
  184	(   var(Out0)
  185	->  true
  186	;   setup_call_cleanup(
  187		zopen(Out0, Out, [format(gzip)]),
  188		format(Out, '~s~s', [Hdr, Data]),
  189		close(Out))
  190	).
  191
  192exists_or_create(Path, _Out) :-
  193	exists_file(Path), !.
  194exists_or_create(Path, Out) :-
  195        file_directory_name(Path, Dir),
  196        make_directory_path(Dir),
  197        open(Path, write, Out, [encoding(utf8), lock(write)]).
  198
  199ensure_directory(Dir) :-
  200	exists_directory(Dir), !.
  201ensure_directory(Dir) :-
  202	make_directory(Dir).
  203
  204%%	load_object(+Store, +Hash, -Data, -Type, -Size) is det.
  205%
  206%	Load the given object.
  207
  208load_object(_Store, Hash, Data, Type, Size) :-
  209        load_object_from_pack(Hash, Data0, Type0, Size0), !,
  210        f(Data0, Type0, Size0) = f(Data, Type, Size).
  211load_object(Store, Hash, Data, Type, Size) :-
  212	gitty_object_file(Store, Hash, Path),
  213        exists_file(Path),
  214	setup_call_cleanup(
  215	    gzopen(Path, read, In, [encoding(utf8)]),
  216	    read_object(In, Data, Type, Size),
  217	    close(In)).
  218
  219%!	load_object_header(+Store, +Hash, -Type, -Size) is det.
  220%
  221%	Load the header of an object
  222
  223load_object_header(Store, Hash, Type, Size) :-
  224	gitty_object_file(Store, Hash, Path),
  225	setup_call_cleanup(
  226	    gzopen(Path, read, In, [encoding(utf8)]),
  227	    read_object_hdr(In, Type, Size),
  228	    close(In)).
  229
  230read_object(In, Data, Type, Size) :-
  231	read_object_hdr(In, Type, Size),
  232	read_string(In, _, Data).
  233
  234read_object_hdr(In, Type, Size) :-
  235	get_code(In, C0),
  236	read_hdr(C0, In, Hdr),
  237	phrase((nonblanks(TypeChars), " ", integer(Size)), Hdr),
  238	atom_codes(Type, TypeChars).
  239
  240read_hdr(C, In, [C|T]) :-
  241	C > 0, !,
  242	get_code(In, C1),
  243	read_hdr(C1, In, T).
  244read_hdr(_, _, []).
  245
  246%%	gitty_rescan(?Store) is det.
  247%
  248%	Update our view of the shared   storage  for all stores matching
  249%	Store.
  250
  251gitty_rescan(Store) :-
  252	retractall(store(Store, _)).
  253
  254%%	gitty_scan(+Store) is det.
  255%
  256%	Scan gitty store for files (entries),   filling  head/3. This is
  257%	performed lazily at first access to the store.
  258%
  259%	@tdb	Possibly we need to maintain a cached version of this
  260%		index to avoid having to open all objects of the gitty
  261%		store.
  262
  263gitty_scan(Store) :-
  264	store(Store, _), !,
  265	remote_updates(Store).
  266gitty_scan(Store) :-
  267	with_mutex(gitty, gitty_scan_sync(Store)).
  268
  269:- thread_local
  270	latest/3.  271
  272gitty_scan_sync(Store) :-
  273	store(Store, _), !.
  274gitty_scan_sync(Store) :-
  275	remote_sync(true), !,
  276        gitty_attach_packs(Store),
  277	restore_heads_from_remote(Store).
  278gitty_scan_sync(Store) :-
  279        gitty_attach_packs(Store),
  280	read_heads_from_objects(Store).
  281
  282%%	read_heads_from_objects(+Store) is det.
  283%
  284%       Establish the head(Store,File,Ext,Hash) relation  by reading all
  285%       objects and adding a fact for the most recent commit.
  286
  287read_heads_from_objects(Store) :-
  288	gitty_scan_latest(Store),
  289	forall(retract(latest(Name, Hash, _Time)),
  290	       assert_head(Store, Name, Hash)),
  291	get_time(Now),
  292	assertz(store(Store, Now)).
  293
  294assert_head(Store, Name, Hash) :-
  295	file_name_extension(_, Ext, Name),
  296        assertz(head(Store, Name, Ext, Hash)).
  297
  298
  299%%	gitty_scan_latest(+Store)
  300%
  301%	Scans the gitty store, extracting  the   latest  version of each
  302%	named entry.
  303
  304gitty_scan_latest(Store) :-
  305	retractall(head(Store, _, _, _)),
  306	retractall(latest(_, _, _)),
  307	(   gitty_hash(Store, Hash),
  308	    load_object(Store, Hash, Data, commit, _Size),
  309	    term_string(Meta, Data, []),
  310	    _{name:Name, time:Time} :< Meta,
  311	    (	latest(Name, _, OldTime),
  312		OldTime > Time
  313	    ->	true
  314	    ;	retractall(latest(Name, _, _)),
  315		assertz(latest(Name, Hash, Time))
  316	    ),
  317	    fail
  318	;   true
  319	).
  320
  321
  322%%	gitty_hash(+Store, ?Hash) is nondet.
  323%
  324%	True when Hash is an object in the store.
  325
  326gitty_hash(Store, Hash) :-
  327	var(Hash), !,
  328        (   gitty_attach_packs(Store),
  329            pack_object(Hash, _Type, _Size, _Offset, Store, _Pack)
  330        ;   gitty_file_object(Store, Hash)
  331        ).
  332gitty_hash(Store, Hash) :-
  333        (   gitty_attach_packs(Store),
  334            pack_object(Hash, _Type, _Size, _Offset, Store, _Pack)
  335        ->  true
  336        ;   gitty_object_file(Store, Hash, File),
  337            exists_file(File)
  338        ).
  339
  340gitty_file_object(Store, Hash) :-
  341	access_file(Store, exist),
  342	directory_files(Store, Level0),
  343	member(E0, Level0),
  344	E0 \== '..',
  345	atom_length(E0, 2),
  346	directory_file_path(Store, E0, Dir0),
  347	directory_files(Dir0, Level1),
  348	member(E1, Level1),
  349	E1 \== '..',
  350	atom_length(E1, 2),
  351	directory_file_path(Dir0, E1, Dir),
  352	directory_files(Dir, Files),
  353	member(File, Files),
  354	atom_length(File, 36),
  355	atomic_list_concat([E0,E1,File], Hash).
  356
  357%%	delete_object(+Store, +Hash)
  358%
  359%	Delete an existing object
  360
  361delete_object(Store, Hash) :-
  362	gitty_object_file(Store, Hash, File),
  363	delete_file(File).
  364
  365%!	gitty_object_file(+Store, +Hash, -Path) is det.
  366%
  367%	True when Path is the file  at   which  the  object with Hash is
  368%	stored.
  369
  370gitty_object_file(Store, Hash, Path) :-
  371	sub_string(Hash, 0, 2, _, Dir0),
  372	sub_string(Hash, 2, 2, _, Dir1),
  373	sub_string(Hash, 4, _, 0, File),
  374	atomic_list_concat([Store, Dir0, Dir1, File], /, Path).
  375
  376
  377		 /*******************************
  378		 *	      SYNCING		*
  379		 *******************************/
  380
  381%%	gitty_update_head(+Store, +Name, +OldCommit, +NewCommit) is det.
  382%
  383%	Update the head of a gitty  store   for  Name.  OldCommit is the
  384%	current head and NewCommit is the new  head. If Name is created,
  385%	and thus there is no head, OldCommit must be `-`.
  386%
  387%	This operation can fail because another   writer has updated the
  388%	head.  This can both be in-process or another process.
  389
  390gitty_update_head(Store, Name, OldCommit, NewCommit) :-
  391	with_mutex(gitty,
  392		   gitty_update_head_sync(Store, Name, OldCommit, NewCommit)).
  393
  394gitty_update_head_sync(Store, Name, OldCommit, NewCommit) :-
  395	remote_sync(true), !,
  396	setup_call_cleanup(
  397	    heads_output_stream(Store, HeadsOut),
  398	    gitty_update_head_sync(Store, Name, OldCommit, NewCommit, HeadsOut),
  399	    close(HeadsOut)).
  400gitty_update_head_sync(Store, Name, OldCommit, NewCommit) :-
  401	gitty_update_head_sync2(Store, Name, OldCommit, NewCommit).
  402
  403gitty_update_head_sync(Store, Name, OldCommit, NewCommit, HeadsOut) :-
  404	gitty_update_head_sync2(Store, Name, OldCommit, NewCommit),
  405	format(HeadsOut, '~q.~n', [head(Name, OldCommit, NewCommit)]).
  406
  407gitty_update_head_sync2(Store, Name, OldCommit, NewCommit) :-
  408	gitty_scan(Store),		% fetch remote changes
  409	(   OldCommit == (-)
  410	->  (   head(Store, Name, _, _)
  411	    ->	throw(error(gitty(file_exists(Name),_)))
  412	    ;	assert_head(Store, Name, NewCommit)
  413	    )
  414	;   (   retract(head(Store, Name, _, OldCommit))
  415	    ->	assert_head(Store, Name, NewCommit)
  416	    ;	throw(error(gitty(not_at_head(Name, OldCommit)), _))
  417	    )
  418	).
  419
  420%!	remote_updates(+Store)
  421%
  422%	Watch for remote updates to the store. We only do this if we did
  423%	not do so the last second.
  424
  425:- dynamic
  426	last_remote_sync/2.  427
  428remote_updates(_) :-
  429	remote_sync(false), !.
  430remote_updates(Store) :-
  431	remote_up_to_data(Store), !.
  432remote_updates(Store) :-
  433	with_mutex(gitty, remote_updates_sync(Store)).
  434
  435remote_updates_sync(Store) :-
  436	remote_up_to_data(Store), !.
  437remote_updates_sync(Store) :-
  438	retractall(last_remote_sync(Store, _)),
  439	get_time(Now),
  440	asserta(last_remote_sync(Store, Now)),
  441	remote_update(Store).
  442
  443remote_up_to_data(Store) :-
  444	last_remote_sync(Store, Last),
  445	get_time(Now),
  446	Now-Last < 1.
  447
  448remote_update(Store) :-
  449	remote_updates(Store, List),
  450	maplist(update_head(Store), List).
  451
  452update_head(Store, head(Name, OldCommit, NewCommit)) :-
  453	(   OldCommit == (-)
  454	->  \+ head(Store, Name, _, _)
  455	;   retract(head(Store, Name, _, OldCommit))
  456	), !,
  457	assert_head(Store, Name, NewCommit).
  458update_head(_, _).
  459
  460%%	remote_updates(+Store, -List) is det.
  461%
  462%	Find updates from other gitties  on   the  same filesystem. Note
  463%	that we have to push/pop the input   context to avoid creating a
  464%	notion of an  input  context   which  possibly  relate  messages
  465%	incorrectly to the sync file.
  466
  467remote_updates(Store, List) :-
  468	heads_input_stream(Store, Stream),
  469	setup_call_cleanup(
  470	    '$push_input_context'(gitty_sync),
  471	    read_new_terms(Stream, List),
  472	    '$pop_input_context').
  473
  474read_new_terms(Stream, Terms) :-
  475	read(Stream, First),
  476	read_new_terms(First, Stream, Terms).
  477
  478read_new_terms(end_of_file, _, List) :- !,
  479	List = [].
  480read_new_terms(Term, Stream, [Term|More]) :-
  481	read(Stream, Term2),
  482	read_new_terms(Term2, Stream, More).
  483
  484heads_output_stream(Store, Out) :-
  485	heads_file(Store, HeadsFile),
  486	open(HeadsFile, append, Out,
  487	     [ encoding(utf8),
  488	       lock(exclusive)
  489	     ]).
  490
  491heads_input_stream(Store, Stream) :-
  492	heads_input_stream_cache(Store, Stream0), !,
  493	Stream = Stream0.
  494heads_input_stream(Store, Stream) :-
  495	heads_file(Store, File),
  496	between(1, 2, _),
  497	catch(open(File, read, In,
  498		   [ encoding(utf8),
  499		     eof_action(reset)
  500		   ]),
  501	      _,
  502	      create_heads_file(Store)), !,
  503	assert(heads_input_stream_cache(Store, In)),
  504	Stream = In.
  505
  506create_heads_file(Store) :-
  507	call_cleanup(
  508	    heads_output_stream(Store, Out),
  509	    close(Out)),
  510	fail.					% always fail!
  511
  512heads_file(Store, HeadsFile) :-
  513	ensure_directory(Store),
  514	directory_file_path(Store, ref, RefDir),
  515	ensure_directory(RefDir),
  516	directory_file_path(RefDir, head, HeadsFile).
  517
  518%%	restore_heads_from_remote(Store)
  519%
  520%	Restore the known heads by reading the remote sync file.
  521
  522restore_heads_from_remote(Store) :-
  523	heads_file(Store, File),
  524	exists_file(File),
  525	setup_call_cleanup(
  526	    open(File, read, In, [encoding(utf8)]),
  527	    restore_heads(Store, In),
  528	    close(In)), !,
  529	get_time(Now),
  530	assertz(store(Store, Now)).
  531restore_heads_from_remote(Store) :-
  532	read_heads_from_objects(Store),
  533	heads_file(Store, File),
  534	setup_call_cleanup(
  535	    open(File, write, Out, [encoding(utf8)]),
  536	    save_heads(Store, Out),
  537	    close(Out)), !.
  538
  539restore_heads(Store, In) :-
  540	read(In, Term0),
  541	Term0 = epoch(_),
  542	read(In, Term1),
  543	restore_heads(Term1, In, Store).
  544
  545restore_heads(end_of_file, _, _) :- !.
  546restore_heads(head(File, _, Hash), In, Store) :-
  547	retractall(head(Store, File, _, _)),
  548	assert_head(Store, File, Hash),
  549	read(In, Term),
  550	restore_heads(Term, In, Store).
  551
  552save_heads(Store, Out) :-
  553	get_time(Now),
  554	format(Out, 'epoch(~0f).~n~n', [Now]),
  555	forall(head(Store, File, _, Hash),
  556	       format(Out, '~q.~n', [head(File, -, Hash)])).
  557
  558
  559%%	delete_head(+Store, +Head) is det.
  560%
  561%	Delete Head from Store. Used  by   gitty_fsck/1  to remove heads
  562%	that have no commits. Should  we   forward  this  to remotes, or
  563%	should they do their own thing?
  564
  565delete_head(Store, Head) :-
  566	retractall(head(Store, Head, _, _)).
  567
  568%%	set_head(+Store, +File, +Hash) is det.
  569%
  570%	Set the head of the given File to Hash
  571
  572set_head(Store, File, Hash) :-
  573	file_name_extension(_, Ext, File),
  574        (   head(Store, File, _, Hash0)
  575        ->  (   Hash == Hash0
  576            ->  true
  577            ;   asserta(head(Store, File, Ext, Hash)),
  578                retractall(head(Store, File, _, Hash0))
  579            )
  580        ;   asserta(head(Store, File, Ext, Hash))
  581        ).
  582
  583
  584		 /*******************************
  585		 *	      PACKS		*
  586		 *******************************/
  587
  588/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  589
  590<pack file> := <header>
  591               <file>*
  592<header>    := "gitty(Version).\n" <object>* "end_of_header.\n"
  593<object>    := obj(Hash, Type, Size, FileSize)
  594- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
  595
  596pack_version(1).
  597
  598%!  repack_objects(+Store, +Options) is det.
  599%
  600%   Repack  objects  of  Store  for  reduced  disk  usage  and  enhanced
  601%   performance. By default this picks up all  file objects of the store
  602%   and all existing small pack files.  Options:
  603%
  604%     - small_pack(+Bytes)
  605%     Consider all packs with less than Bytes as small and repack them.
  606%     Default 10Mb
  607%     - min_files(+Count)
  608%     Do not repack if there are less than Count new files.
  609%     Default 1,000.
  610
  611:- debug(gitty(pack)).  612
  613repack_objects(Store, Options) :-
  614    option(min_files(MinFiles), Options, 1_000),
  615    findall(Object, gitty_file_object(Store, Object), Objects),
  616    length(Objects, NewFiles),
  617    debug(gitty(pack), 'Found ~D file objects', [NewFiles]),
  618    (   NewFiles >= MinFiles
  619    ->  pack_files(Store, ExistingPacks),
  620        option(small_pack(MaxSize), Options, 10_000_000),
  621        include(small_file(MaxSize), ExistingPacks, PackFiles),
  622        (   debugging(gitty(pack))
  623        ->  length(PackFiles, PackCount),
  624            debug(gitty(pack), 'Found ~D small packs', [PackCount])
  625        ;   true
  626        ),
  627        directory_file_path(Store, pack, PackDir),
  628        make_directory_path(PackDir),
  629        pack_objects(Store, Objects, PackFiles, PackDir, _PackFile, Options)
  630    ;   debug(gitty(pack), 'Nothing to do', [])
  631    ).
  632
  633small_file(MaxSize, File) :-
  634    size_file(File, Size),
  635    Size < MaxSize.
  636
  637%!  pack_objects(+Store, +Objects, +Packs, +PackDir,
  638%!               -PackFile, +Options) is det.
  639%
  640%   Pack the given objects and pack files into a new pack.
  641
  642pack_objects(Store, Objects, Packs, PackDir, PackFile, Options) :-
  643    with_mutex(gitty_pack,
  644	       pack_objects_sync(Store, Objects, Packs, PackDir,
  645                                 PackFile, Options)).
  646
  647pack_objects_sync(_Store, [], [Pack], _, [Pack], _) :-
  648    !.
  649pack_objects_sync(Store, Objects, Packs, PackDir, PackFilePath, Options) :-
  650    length(Objects, ObjCount),
  651    length(Packs, PackCount),
  652    debug(gitty(pack), 'Repacking ~D objects and ~D packs',
  653          [ObjCount, PackCount]),
  654    maplist(object_info(Store), Objects, FileInfo),
  655    maplist(pack_info(Store), Packs, PackInfo),
  656    append([FileInfo|PackInfo], Info0),
  657    sort(1, @<, Info0, Info),           % remove possible duplicates
  658    (   debugging(gitty(pack))
  659    ->  (   PackCount > 0
  660        ->  length(Info, FinalObjCount),
  661            debug(gitty(pack), 'Total ~D objects', [FinalObjCount])
  662        ;   true
  663        )
  664    ;   true
  665    ),
  666    directory_file_path(PackDir, 'pack-create', TmpPack),
  667    setup_call_cleanup(
  668	(   open(TmpPack, write, Out0, [type(binary), lock(write)]),
  669	    open_hash_stream(Out0, Out, [algorithm(sha1)])
  670	),
  671        (   write_signature(Out),
  672            maplist(write_header(Out), Info),
  673            format(Out, 'end_of_header.~n', []),
  674            maplist(add_file(Out, Store), Info),
  675	    stream_hash(Out, SHA1)
  676        ),
  677        close(Out)),
  678    format(atom(PackFile), 'pack-~w.pack', [SHA1]),
  679    directory_file_path(PackDir, PackFile, PackFilePath),
  680    rename_file(TmpPack, PackFilePath),
  681    debug(gitty(pack), 'Attaching ~p', [PackFilePath]),
  682    attach_pack(Store, PackFilePath),
  683    remove_objects_after_pack(Store, Objects, Options),
  684    delete(Packs, PackFilePath, RmPacks),
  685    remove_repacked_packs(Store, RmPacks, Options),
  686    debug(gitty(pack), 'Packing completed', []).
  687
  688object_info(Store, Object, obj(Object, Type, Size, FileSize)) :-
  689    gitty_object_file(Store, Object, File),
  690    load_object_header(Store, Object, Type, Size),
  691    size_file(File, FileSize).
  692
  693pack_info(Store, PackFile, Objects) :-
  694    attach_pack(Store, PackFile),
  695    pack_read_header(PackFile, _Version, _DataOffset, Objects).
  696
  697write_signature(Out) :-
  698    pack_version(Version),
  699    format(Out, "gitty(~d).~n", [Version]).
  700
  701write_header(Out, obj(Object, Type, Size, FileSize)) :-
  702    format(Out, 'obj(~q,~q,~d,~d).~n', [Object, Type, Size, FileSize]).
  703
  704%!  add_file(+Out, +Store, +Object) is det.
  705%
  706%   Add Object from Store to the pack stream Out.
  707
  708add_file(Out, Store, obj(Object, _Type, _Size, _FileSize)) :-
  709    gitty_object_file(Store, Object, File),
  710    exists_file(File),
  711    !,
  712    setup_call_cleanup(
  713        open(File, read, In, [type(binary)]),
  714        copy_stream_data(In, Out),
  715        close(In)).
  716add_file(Out, Store, obj(Object, Type, Size, FileSize)) :-
  717    pack_object(Object, Type, Size, Offset, Store, PackFile),
  718    setup_call_cleanup(
  719        open(PackFile, read, In, [type(binary)]),
  720        (   seek(In, Offset, bof, Offset),
  721            copy_stream_data(In, Out, FileSize)
  722        ),
  723        close(In)).
  724
  725
  726%!  gitty_fsck(+Store) is det.
  727%
  728%   Validate all packs associated with Store
  729
  730gitty_fsck(Store) :-
  731    pack_files(Store, PackFiles),
  732    maplist(fsck_pack, PackFiles).
  733
  734%!  fsck_pack(+File) is det.
  735%
  736%   Validate the integrity of the pack file File.
  737
  738fsck_pack(File) :-
  739    debug(gitty(pack), 'fsck ~p', [File]),
  740    check_pack_hash(File),
  741    debug(gitty(pack), 'fsck ~p: checking objects', [File]),
  742    check_pack_objects(File),
  743    debug(gitty(pack), 'fsck ~p: done', [File]).
  744
  745check_pack_hash(File) :-
  746    file_base_name(File, Base),
  747    file_name_extension(Plain, Ext, Base),
  748    must_be(oneof([pack]), Ext),
  749    atom_concat('pack-', Hash, Plain),
  750    setup_call_cleanup(
  751        (   open(File, read, In0, [type(binary)]),
  752            open_hash_stream(In0, In, [algorithm(sha1)])
  753        ),
  754        (   setup_call_cleanup(
  755                open_null_stream(Null),
  756                copy_stream_data(In, Null),
  757                close(Null)),
  758            stream_hash(In, SHA1)
  759        ),
  760        close(In)),
  761    assertion(Hash == SHA1).
  762
  763check_pack_objects(PackFile) :-
  764    setup_call_cleanup(
  765        open(PackFile, read, In, [type(binary)]),
  766        (  read_header(In, Version, DataOffset, Objects),
  767           set_stream(In, encoding(utf8)),
  768           foldl(check_object(In, PackFile, Version), Objects, DataOffset, _)
  769        ),
  770        close(In)).
  771
  772check_object(In, PackFile, _Version,
  773             obj(Object, Type, Size, FileSize),
  774             Offset0, Offset) :-
  775    Offset is Offset0+FileSize,
  776    byte_count(In, Here),
  777    (   Here == Offset0
  778    ->  true
  779    ;   print_message(warning, pack(reposition(Here, Offset0))),
  780        seek(In, Offset0, bof, Offset0)
  781    ),
  782    (   setup_call_cleanup(
  783            zopen(In, In2, [multi_part(false), close_parent(false)]),
  784            catch(read_object(In2, Data, _0RType, _0RSize), E,
  785                  ( print_message(error,
  786                                  gitty(PackFile, fsck(read_object(Object, E)))),
  787                    fail)),
  788            close(In2))
  789    ->  byte_count(In, End),
  790        (   End == Offset
  791        ->  true
  792        ;   print_message(error,
  793                          gitty(PackFile, fsck(object_end(Object, End,
  794                                                          Offset0, Offset,
  795                                                          Data))))
  796        ),
  797        assertion(Type == _0RType),
  798        assertion(Size == _0RSize),
  799        gitty:check_object(Object, Data, Type, Size)
  800    ;   true
  801    ).
  802
  803
  804%!  gitty_attach_packs(+Store) is det.
  805%
  806%   Attach all packs for Store
  807
  808gitty_attach_packs(Store) :-
  809    attached_packs(Store),
  810    !.
  811gitty_attach_packs(Store) :-
  812    with_mutex(gitty_attach_packs,
  813               gitty_attach_packs_sync(Store)).
  814
  815gitty_attach_packs_sync(Store) :-
  816    attached_packs(Store),
  817    !.
  818gitty_attach_packs_sync(Store) :-
  819    pack_files(Store, PackFiles),
  820    maplist(attach_pack(Store), PackFiles),
  821    asserta(attached_packs(Store)).
  822
  823pack_files(Store, Packs) :-
  824    directory_file_path(Store, pack, PackDir),
  825    exists_directory(PackDir),
  826    !,
  827    directory_files(PackDir, Files),
  828    convlist(is_pack(PackDir), Files, Packs).
  829pack_files(_, []).
  830
  831is_pack(PackDir, File, Path) :-
  832    file_name_extension(_, pack, File),
  833    directory_file_path(PackDir, File, Path).
  834
  835%!  attach_pack(+Store, +PackFile)
  836%
  837%   Load the index of Pack into memory.
  838
  839attach_pack(Store, PackFile) :-
  840    attached_pack(Store, PackFile),
  841    !.
  842attach_pack(Store, PackFile) :-
  843    retractall(pack_object(_,_,_,_,_,PackFile)),
  844    pack_read_header(PackFile, Version, DataOffset, Objects),
  845    foldl(assert_object(Store, PackFile, Version), Objects, DataOffset, _),
  846    assertz(attached_pack(Store, PackFile)).
  847
  848pack_read_header(PackFile, Version, DataOffset, Objects) :-
  849    setup_call_cleanup(
  850        open(PackFile, read, In, [type(binary)]),
  851        read_header(In, Version, DataOffset, Objects),
  852        close(In)).
  853
  854read_header(In, Version, DataOffset, Objects) :-
  855    read(In, Signature),
  856    (   Signature = gitty(Version)
  857    ->  true
  858    ;   domain_error(gitty_pack_file, Objects)
  859    ),
  860    read(In, Term),
  861    read_index(Term, In, Objects),
  862    get_code(In, Code),
  863    assertion(Code == 0'\n),
  864    byte_count(In, DataOffset).
  865
  866read_index(end_of_header, _, []) :-
  867    !.
  868read_index(Object, In, [Object|T]) :-
  869    read(In, Term2),
  870    read_index(Term2, In, T).
  871
  872assert_object(Store, Pack, _Version,
  873              obj(Object, Type, Size, FileSize),
  874              Offset0, Offset) :-
  875    Offset is Offset0+FileSize,
  876    assertz(pack_object(Object, Type, Size, Offset0, Store, Pack)).
  877
  878%!  detach_pack(+Store, +Pack) is det.
  879%
  880%   Remove a pack file from the memory index.
  881
  882detach_pack(Store, Pack) :-
  883    retractall(pack_object(_, _, _, _, Store, Pack)),
  884    retractall(attached_pack(Store, Pack)).
  885
  886%!  load_object_from_pack(+Hash, -Data, -Type, -Size) is semidet.
  887%
  888%   True when Hash is in a pack and can be loaded.
  889
  890load_object_from_pack(Hash, Data, Type, Size) :-
  891    pack_object(Hash, Type, Size, Offset, _Store, Pack),
  892    setup_call_cleanup(
  893        open(Pack, read, In, [type(binary)]),
  894        read_object_at(In, Offset, Data, Type, Size),
  895        close(In)).
  896
  897read_object_at(In, Offset, Data, Type, Size) :-
  898    seek(In, Offset, bof, Offset),
  899    read_object_here(In, Data, Type, Size).
  900
  901read_object_here(In, Data, Type, Size) :-
  902    stream_property(In, encoding(Enc)),
  903    setup_call_cleanup(
  904        ( set_stream(In, encoding(utf8)),
  905          zopen(In, In2, [multi_part(false), close_parent(false)])
  906        ),
  907        read_object(In2, Data, Type, Size),
  908        ( close(In2),
  909          set_stream(In, encoding(Enc))
  910        )).
  911
  912%!  unpack_packs(+Store) is det.
  913%
  914%   Unpack all packs.
  915
  916unpack_packs(Store) :-
  917    absolute_file_name(Store, AbsStore, [file_type(directory),
  918                                         access(read)]),
  919    pack_files(AbsStore, Packs),
  920    maplist(unpack_pack(AbsStore), Packs).
  921
  922%!  unpack_pack(+Store, +Pack) is det.
  923%
  924%   Turn a pack back into a plain object files
  925
  926unpack_pack(Store, PackFile) :-
  927    pack_read_header(PackFile, _Version, DataOffset, Objects),
  928    setup_call_cleanup(
  929        open(PackFile, read, In, [type(binary)]),
  930        foldl(create_file(Store, In), Objects, DataOffset, _),
  931        close(In)),
  932    detach_pack(Store, PackFile),
  933    delete_file(PackFile).
  934
  935create_file(Store, In, obj(Object, _Type, _Size, FileSize), Offset0, Offset) :-
  936    Offset is Offset0+FileSize,
  937    gitty_object_file(Store, Object, Path),
  938    with_mutex(gitty_file, exists_or_recreate(Path, Out)),
  939	(   var(Out)
  940	->  true
  941	;   setup_call_cleanup(
  942                seek(In, Offset0, bof, Offset0),
  943                copy_stream_data(In, Out, FileSize),
  944                close(Out))
  945	).
  946
  947exists_or_recreate(Path, _Out) :-
  948	exists_file(Path), !.
  949exists_or_recreate(Path, Out) :-
  950        file_directory_name(Path, Dir),
  951        make_directory_path(Dir),
  952        open(Path, write, Out, [type(binary), lock(write)]).
  953
  954
  955%!  remove_objects_after_pack(+Store, +Objects, +Options) is det.
  956%
  957%   Remove the indicated (file) objects from Store.
  958
  959remove_objects_after_pack(Store, Objects, Options) :-
  960    debug(gitty(pack), 'Deleting plain files', []),
  961    maplist(delete_object(Store), Objects),
  962    (   option(prune_empty_directories(true), Options, true)
  963    ->  debug(gitty(pack), 'Pruning empty directories', []),
  964        prune_empty_directories(Store)
  965    ;   true
  966    ).
  967
  968%!  remove_repacked_packs(+Store, +Packs, +Options)
  969%
  970%   Remove packs that have been repacked.
  971
  972remove_repacked_packs(Store, Packs, Options) :-
  973    maplist(remove_pack(Store, Options), Packs).
  974
  975remove_pack(Store, _Options, Pack) :-
  976    detach_pack(Store, Pack),
  977    delete_file(Pack).
  978
  979%!  prune_empty_directories(+Dir) is det.
  980%
  981%   Prune directories that are  empty  below   Dir.  Dir  itself  is not
  982%   removed, even if it is empty.
  983
  984prune_empty_directories(Dir) :-
  985    prune_empty_directories(Dir, 0).
  986
  987prune_empty_directories(Dir, Level) :-
  988    directory_files(Dir, AllFiles),
  989    exclude(hidden, AllFiles, Files),
  990    (   Files == [],
  991        Level > 0
  992    ->  delete_directory_async(Dir)
  993    ;   convlist(prune_empty_directories(Dir, Level), Files, Left),
  994        (   Left == [],
  995            Level > 0
  996        ->  delete_directory_async(Dir)
  997        ;   true
  998        )
  999    ).
 1000
 1001hidden(.).
 1002hidden(..).
 1003
 1004prune_empty_directories(Parent, Level0, File, _) :-
 1005    directory_file_path(Parent, File, Path),
 1006    exists_directory(Path),
 1007    !,
 1008    Level is Level0 + 1,
 1009    prune_empty_directories(Path, Level),
 1010    fail.
 1011prune_empty_directories(_, _, File, File).
 1012
 1013delete_directory_async(Dir) :-
 1014    with_mutex(gitty_file, delete_directory_async2(Dir)).
 1015
 1016delete_directory_async2(Dir) :-
 1017    catch(delete_directory(Dir), E,
 1018          (   \+ exists_directory(Dir)
 1019          ->  true
 1020          ;   \+ empty_directory(Dir)
 1021          ->  true
 1022          ;   throw(E)
 1023          )).
 1024
 1025empty_directory(Dir) :-
 1026    directory_files(Dir, AllFiles),
 1027    exclude(hidden, AllFiles, [])