versioned_graph/commit

split up code over multiple files

author	Jacco van Ossenbruggen
	Mon Jul 16 16:59:22 2012 +0200
committer	Jacco van Ossenbruggen
	Mon Jul 16 16:59:22 2012 +0200
commit	8f1cb26b0b8bdd7a375656b3eddc6c3dbf22a72f
tree	c59c7b459dfe3d35d0a3973dacadc59c1e07232a
parent	d34c71b09962d0deaaadef3cbc48d0432478ef63
Diff style: patch stat
diff --git a/lib/graph_version.pl b/lib/graph_version.pl
index a979366..9724713 100644
--- a/lib/graph_version.pl
+++ b/lib/graph_version.pl
@@ -6,7 +6,6 @@
 	   gv_resource_commit/4,
 	   gv_head/1,
 	   gv_hash_uri/2,
-	   gv_compute_hash/2,
 	   gv_copy_graph/2,
 	   gv_graph_triples/2,
 	   gv_commit_property/2,
@@ -20,6 +19,10 @@
 :- use_module(library(settings)).
 :- use_module(library(git)).
 
+:- use_module(url_to_filename).
+:- use_module(hash_atom).
+:- use_module(parse_git_objects).
+
 :- rdf_register_ns(gv,       'http://semanticweb.cs.vu.nl/graph/version/').
 :- rdf_register_ns(hash,     'http://semanticweb.cs.vu.nl/graph/hash/').
 :- rdf_register_ns(localgit, 'http://localhost/git/').
@@ -38,6 +41,26 @@
 :- listen(settings(changed(graph_version:_Setting, _Old, _New)),
 	  gv_init).
 
+%%	gv_hash_uri(+Hash, -URI) is det.
+%
+%	URI is a uri constructed by concatenating the
+%	Hash with some additional prefix to make it a
+%	legal URI.
+%
+%	This provides a basic one to one mapping between git's SHA1 hash
+%	ids and the URIs used in RDF.
+
+gv_hash_uri(Hash, URI) :-
+	nonvar(Hash), Hash \= null,
+	!,
+	atom_concat(x, Hash, Local),
+	rdf_global_id(hash:Local, URI).
+
+gv_hash_uri(Hash, URI) :-
+	nonvar(URI),!,
+	rdf_global_id(hash:Local, URI),
+	atom_concat(x, Hash, Local).
+
 %%	git_init is det.
 %
 %       Initialise the RDF and/or GIT version repositories.
@@ -97,18 +120,12 @@ gv_current_branch(Branch) :-
 	sub_atom(RefNL, 0, _, 1, Ref),
 	rdf_global_id(localgit:Ref, Branch).
 
-gv_current_branch(Branch) :-
-	\+ setting(gv_refs_store, rdf_only),
-	% as above, but without using git itself.
-	% Assume current branch is in git file HEAD:
-	setting(gv_git_dir, Dir),
-	directory_file_path(Dir, '.git', DotDir),
-	directory_file_path(DotDir, 'HEAD', HEAD),
-	read_file_to_codes(HEAD, Codes, []),
-	atom_codes(Atom, Codes),  %'ref: refs/heads/master\n'
-	sub_atom(Atom, 5,_,1, Ref),
-	rdf_global_id(localgit:Ref, Branch).
-
+%%	gv_commit_property(+Commit, -Prop) is det.
+%
+%	True if Prop unifies with a property of Commit.
+%	Prop is of the form property_name(property_value).
+%
+%
 
 gv_commit_property(null, tree(null)).
 
@@ -133,6 +150,9 @@ gv_commit_property(Commit, RDFProp) :-
 	;   memberchk(RDFPred, [committer_name, committer_date, committer_email])
 	->  option(committer(C), CommitObject),
 	    option(RDFProp, C)
+	;   memberchk(RDFPred, [author_name, author_date, author_email])
+	->  option(author(C), CommitObject),
+	    option(RDFProp, C)
 	).
 
 gv_diff(Commit1, null, Changed, OnlyIn1, OnlyIn2, Same) :-
@@ -259,16 +279,17 @@ gv_move_head_(NewHead) :-
 %	The action is commited by creating a Commit object, this object
 %	links with:
 %	* gv:parent to the previous commit
-%	* gv:tree to the tree representation of the current
-%	  version graphs
+%	* gv:tree to the tree representation of the current set of
+%	  versioned graphs
 %	* gv:committer_name to Committer
-%	* gv:author_name to Committer
-%	* gv:comment to Comment
 %	* gv:commiter_date to the current time
+%	* gv:author_name to Committer
 %	* gv:author_date to the current time
+%	* gv:comment to Comment
 %
 %	Todo: Fix MT issues, just a mutex is not sufficient.
 %	Needs true git-like branching model?
+%	Fix email handling.
 
 gv_resource_commit(Graph, Committer, Comment, Commit) :-
 	with_mutex(gv_commit_mutex,
@@ -405,7 +426,7 @@ gv_add_blob_to_tree(Tree, Graph, Uri, NewTree, Options) :-
 tree_triple_to_git(rdf(S,P,O), Atom) :-
 	rdf_equal(P, gv:blob), % just checking ...
 	gv_hash_uri(Hash, O),
-	my_hash_atom(Codes, Hash),
+	gv_hash_atom(Codes, Hash),
 	url_to_filename(S, Filename),
 	atom_codes(HashCode,Codes),
 	format(atom(A), '100644 ~w\u0000', [Filename]),
@@ -417,48 +438,9 @@ git_tree_pair_to_triple([hash(H),name(Senc)], rdf(Sdec,P,O)) :-
 	gv_hash_uri(H,O).
 
 
-%%	gv_compute_hash(+Triples, ?Hash) is det.
-%
-%	True of Hash is a SHA1 hash of the list of Triples.
-%	Hash is computed using the same recipee git uses.
-%	So, if one would run "git hash-object" on the
-%       file containing the canonical turtle serialisation of
-%       Triples, git would generate the same hash.
-
-
-gv_compute_hash(Triples, Hash) :-
-	with_output_to(
-	    atom(Content),
-	    rdf_save_canonical_turtle(
-		stream(current_output),
-		[ expand(triple_in(Triples)),
-		  encoding(wchar_t)])),
-	write_length(Content, Clen, []),
-	format(atom(Out), 'blob ~d\u0000~w', [Clen, Content]),
-	sha_hash(Out, Sha, []),
-	hash_atom(Sha, Hash).
-
-triple_in(RDF, S,P,O,_G) :-
-	member(rdf(S,P,O), RDF).
 
-%%	gv_hash_uri(+Hash, -URI) is det.
-%
-%	URI is a uri constructed by concatenating the
-%	Hash with some additional prefix to make it a
-%	legal URI.
 
 
-gv_hash_uri(Hash, URI) :-
-	ground(Hash), Hash \= null,
-	!,
-	atom_concat(x, Hash, Local),
-	rdf_global_id(hash:Local, URI).
-gv_hash_uri(Hash, URI) :-
-	var(Hash),
-	nonvar(URI),
-	rdf_global_id(hash:Local, URI),
-	atom_concat(x, Hash, Local).
-
 %%	gv_copy_graph(+Source, +Target) is det.
 %
 %	Copy graph Source to graph Target.
@@ -518,239 +500,3 @@ gv_tree_triples(Tree, Triples) :-
 	      fail),
 	phrase(tree(TreeObject), Codes),
 	maplist(git_tree_pair_to_triple, TreeObject, Triples).
-
-
-%%	url_to_filename(+URL, -FileName) is det.
-%%	url_to_filename(-URL, +FileName) is det.
-%
-%	Turn  a  valid  URL  into  a  filename.  Earlier  versions  used
-%	www_form_encode/2, but this can produce  characters that are not
-%	valid  in  filenames.  We  will  use    the   same  encoding  as
-%	www_form_encode/2,  but  using  our  own    rules   for  allowed
-%	characters. The only requirement is that   we avoid any filename
-%	special character in use.  The   current  encoding  use US-ASCII
-%	alnum characters, _ and %
-%
-%	Code copied from rdf_persistency:url_to_filename/2
-%	on July 16 2012.
-
-url_to_filename(URL, FileName) :-
-	atomic(URL), !,
-	atom_codes(URL, Codes),
-	phrase(url_encode(EncCodes), Codes),
-	atom_codes(FileName, EncCodes).
-url_to_filename(URL, FileName) :-
-	www_form_encode(URL, FileName).
-
-url_encode([0'+|T]) -->
-	" ", !,
-        url_encode(T).
-url_encode([C|T]) -->
-	alphanum(C), !,
-	url_encode(T).
-url_encode([C|T]) -->
-	no_enc_extra(C), !,
-	url_encode(T).
-url_encode(Enc) -->
-	(   "\r\n"
-	;   "\n"
-	), !,
-	{ append("%0D%0A", T, Enc)
-	},
-	url_encode(T).
-url_encode([]) -->
-	eos, !.
-url_encode([0'%,D1,D2|T]) -->
-	[C],
-	{ Dv1 is (C>>4 /\ 0xf),
-	  Dv2 is (C /\ 0xf),
-	  code_type(D1, xdigit(Dv1)),
-	  code_type(D2, xdigit(Dv2))
-	},
-	url_encode(T).
-
-eos([], []).
-
-alphanum(C) -->
-	[C],
-	{ C < 128,			% US-ASCII
-	  code_type(C, alnum)
-	}.
-
-no_enc_extra(0'_) --> "_".
-
-
-
-%%	my_hash_atom(+Codes, -Hash) is det.
-%       my_hash_atom(-Codes, +Hash) is det.
-%
-%       Bi-directional version of hash_atom/2 ...
-%
-my_hash_atom(Codes, Hash) :-
-	nonvar(Codes),
-	!,
-	hash_atom(Codes, Hash).
-
-my_hash_atom(Codes, Hash) :-
-	nonvar(Hash),
-	atom_chars(Hash, Chars),
-	phrase(hex_bytes(Chars), Codes).
-
-hex_bytes([High,Low|T]) -->
-	{ char_type(High, xdigit(H)),
-	  char_type(Low,  xdigit(L)),
-	  Code is 16*H + L
-	},
-	[Code],
-	hex_bytes(T).
-hex_bytes([]) --> [].
-
-commit(Commit) -->
-	tree_line(T),
-	parent(P),
-	author(AName, AEmail, ADate),
-	committer(CName, CEmail, CDate),
-	comment(CM),!,
-	{
-	 Commit = [
-		   tree(T),
-		   parent(P),
-		   author([ author_name(AName),
-			    author_email(AEmail),
-			    author_date(ADate)
-			  ]),
-		   committer([committer_name(CName),
-			      committer_email(CEmail),
-			      committer_date(CDate)]),
-		   comment(CM)
-		  ]
-	}.
-
-tree_line(T) -->
-	[116, 114, 101, 101, 32],
-	hash(T),
-	[10].
-
-parent(P) -->
-	[112, 97, 114, 101, 110, 116, 32],
-	hash(P),
-	[10].
-parent(null) --> [].
-
-author(Name,Email,Date) -->
-	[97, 117, 116, 104, 111, 114, 32],
-	name(NameC),
-	[32, 60], author_email(EmailC), [62, 32],
-	author_date(DateC,_ZoneC),
-	[10],
-	{
-	 atom_codes(Name, NameC),
-	 atom_codes(Email, EmailC),
-	 atom_codes(Date, DateC)
-	}.
-
-
-committer(Name,Email,Date) -->
-	[99, 111, 109, 109, 105, 116, 116, 101, 114, 32],
-	name(NameC),
-	[32, 60], author_email(EmailC), [62, 32],
-	author_date(DateC,_ZoneC),
-	[10],
-	{
-	 atom_codes(Name, NameC),
-	 atom_codes(Email, EmailC),
-	 atom_codes(Date, DateC)
-	}.
-
-
-name([N|T]) -->
-	name_char(N),
-	name(T).
-name([]) --> [].
-
-author_email([N|T]) -->
-	email_char(N),
-	author_email(T).
-author_email([]) --> [].
-
-author_date(S,Z) -->
-	xdigits(S),
-	[32,43],
-	xdigits(Z).
-
-name_char(N) -->
-	[N],
-	{
-	 N \= 60,
-	 N \= 10
-	}.
-email_char(N) -->
-	[N],
-	{
-	 N \= 62
-	}.
-
-comment(C) -->
-	[10],
-	comment_chars(Codes),
-	{
-	 atom_codes(Atom, Codes),
-	 sub_atom(Atom, 0, _, 1, C) % strip of last \n
-	}.
-comment_chars([C|T]) -->
-	comment_char(C), !,
-	comment_chars(T).
-comment_chars([]) --> [].
-
-comment_char(C) -->
-	[C],
-	{
-	 C \= eos
-	}.
-
-end_of_lines -->
-	[10], end_of_lines.
-end_of_lines -->
-	[].
-
-hash(H) -->
-	xdigits(D),
-	{ atom_codes(H,D) }.
-
-xdigits([D|T]) -->
-        xdigit(D), !,
-        xdigits(T).
-xdigits([]) -->
-        [].
-
-xdigit(E) -->
-        [E],
-        { code_type(E, xdigit(_))
-        }.
-
-
-
-
-
-tree([H|T]) -->
-	blobline(H),
-	tree(T).
-tree([]) --> [].
-
-blobline(Blob) -->
-	mode,
-	myblob,
-	hash(Hash),
-	[09],
-	name(NameCodes),
-	[10],
-	{ atom_codes(Name, NameCodes),
-	  Blob = [hash(Hash),
-		  name(Name)] }.
-
-mode --> % 100644 space
-	[49, 48, 48,54,52,52,32].
-
-myblob -->
-	[98, 108, 111, 98, 32].
-
diff --git a/lib/hash_atom.pl b/lib/hash_atom.pl
new file mode 100644
index 0000000..432a03a
--- /dev/null
+++ b/lib/hash_atom.pl
@@ -0,0 +1,27 @@
+:- module(gv_hash_atom, [
+			 gv_hash_atom/2
+			]).
+
+%%      gv_hash_atom(+Codes, -Hash) is det.
+%       gv_hash_atom(-Codes, +Hash) is det.
+%
+%       Bi-directional version of hash_atom/2 ...
+%
+gv_hash_atom(Codes, Hash) :-
+        nonvar(Codes),
+        !,
+        hash_atom(Codes, Hash).
+
+gv_hash_atom(Codes, Hash) :-
+        nonvar(Hash),
+        atom_chars(Hash, Chars),
+        phrase(hex_bytes(Chars), Codes).
+
+hex_bytes([High,Low|T]) -->
+        { char_type(High, xdigit(H)),
+          char_type(Low,  xdigit(L)),
+          Code is 16*H + L
+        },
+        [Code],
+        hex_bytes(T).
+hex_bytes([]) --> [].
diff --git a/lib/parse_git_objects.pl b/lib/parse_git_objects.pl
new file mode 100644
index 0000000..a169edd
--- /dev/null
+++ b/lib/parse_git_objects.pl
@@ -0,0 +1,156 @@
+:- module(gv_parse_git_objects,
+	  [
+	  commit//1,
+	  tree//1
+	  ]).
+
+
+commit(Commit) -->
+	tree_line(T),
+	parent(P),
+	author(AName, AEmail, ADate),
+	committer(CName, CEmail, CDate),
+	comment(CM),!,
+	{
+	 Commit = [
+		   tree(T),
+		   parent(P),
+		   author([ author_name(AName),
+			    author_email(AEmail),
+			    author_date(ADate)
+			  ]),
+		   committer([committer_name(CName),
+			      committer_email(CEmail),
+			      committer_date(CDate)]),
+		   comment(CM)
+		  ]
+	}.
+
+tree_line(T) -->
+	[116, 114, 101, 101, 32],
+	hash(T),
+	[10].
+
+parent(P) -->
+	[112, 97, 114, 101, 110, 116, 32],
+	hash(P),
+	[10].
+parent(null) --> [].
+
+author(Name,Email,Date) -->
+	[97, 117, 116, 104, 111, 114, 32],
+	name(NameC),
+	[32, 60], author_email(EmailC), [62, 32],
+	author_date(DateC,_ZoneC),
+	[10],
+	{
+	 atom_codes(Name, NameC),
+	 atom_codes(Email, EmailC),
+	 atom_codes(Date, DateC)
+	}.
+
+
+committer(Name,Email,Date) -->
+	[99, 111, 109, 109, 105, 116, 116, 101, 114, 32],
+	name(NameC),
+	[32, 60], author_email(EmailC), [62, 32],
+	author_date(DateC,_ZoneC),
+	[10],
+	{
+	 atom_codes(Name, NameC),
+	 atom_codes(Email, EmailC),
+	 atom_codes(Date, DateC)
+	}.
+
+
+name([N|T]) -->
+	name_char(N),
+	name(T).
+name([]) --> [].
+
+author_email([N|T]) -->
+	email_char(N),
+	author_email(T).
+author_email([]) --> [].
+
+author_date(S,Z) -->
+	xdigits(S),
+	[32,43],
+	xdigits(Z).
+
+name_char(N) -->
+	[N],
+	{
+	 N \= 60,
+	 N \= 10
+	}.
+email_char(N) -->
+	[N],
+	{
+	 N \= 62
+	}.
+
+comment(C) -->
+	[10],
+	comment_chars(Codes),
+	{
+	 atom_codes(Atom, Codes),
+	 sub_atom(Atom, 0, _, 1, C) % strip of last \n
+	}.
+comment_chars([C|T]) -->
+	comment_char(C), !,
+	comment_chars(T).
+comment_chars([]) --> [].
+
+comment_char(C) -->
+	[C],
+	{
+	 C \= eos
+	}.
+
+end_of_lines -->
+	[10], end_of_lines.
+end_of_lines -->
+	[].
+
+hash(H) -->
+	xdigits(D),
+	{ atom_codes(H,D) }.
+
+xdigits([D|T]) -->
+        xdigit(D), !,
+        xdigits(T).
+xdigits([]) -->
+        [].
+
+xdigit(E) -->
+        [E],
+        { code_type(E, xdigit(_))
+        }.
+
+
+
+
+
+tree([H|T]) -->
+	blobline(H),
+	tree(T).
+tree([]) --> [].
+
+blobline(Blob) -->
+	mode,
+	myblob,
+	hash(Hash),
+	[09],
+	name(NameCodes),
+	[10],
+	{ atom_codes(Name, NameCodes),
+	  Blob = [hash(Hash),
+		  name(Name)] }.
+
+mode --> % 100644 space
+	[49, 48, 48,54,52,52,32].
+
+myblob -->
+	[98, 108, 111, 98, 32].
+
diff --git a/lib/url_to_filename.pl b/lib/url_to_filename.pl
new file mode 100644
index 0000000..1b4722a
--- /dev/null
+++ b/lib/url_to_filename.pl
@@ -0,0 +1,62 @@
+:- module(gv_url_to_filename,
+	  [
+	  url_to_filename/2
+	  ]).
+
+%%	url_to_filename(-URL, +FileName) is det.
+%
+%	Turn  a  valid  URL  into  a  filename.  Earlier  versions  used
+%	www_form_encode/2, but this can produce  characters that are not
+%	valid  in  filenames.  We  will  use    the   same  encoding  as
+%	www_form_encode/2,  but  using  our  own    rules   for  allowed
+%	characters. The only requirement is that   we avoid any filename
+%	special character in use.  The   current  encoding  use US-ASCII
+%	alnum characters, _ and %
+%
+%	Code copied from rdf_persistency:url_to_filename/2
+%	on July 16 2012.
+
+url_to_filename(URL, FileName) :-
+	atomic(URL), !,
+	atom_codes(URL, Codes),
+	phrase(url_encode(EncCodes), Codes),
+	atom_codes(FileName, EncCodes).
+url_to_filename(URL, FileName) :-
+	www_form_encode(URL, FileName).
+
+url_encode([0'+|T]) -->
+	" ", !,
+        url_encode(T).
+url_encode([C|T]) -->
+	alphanum(C), !,
+	url_encode(T).
+url_encode([C|T]) -->
+	no_enc_extra(C), !,
+	url_encode(T).
+url_encode(Enc) -->
+	(   "\r\n"
+	;   "\n"
+	), !,
+	{ append("%0D%0A", T, Enc)
+	},
+	url_encode(T).
+url_encode([]) -->
+	eos, !.
+url_encode([0'%,D1,D2|T]) -->
+	[C],
+	{ Dv1 is (C>>4 /\ 0xf),
+	  Dv2 is (C /\ 0xf),
+	  code_type(D1, xdigit(Dv1)),
+	  code_type(D2, xdigit(Dv2))
+	},
+	url_encode(T).
+
+eos([], []).
+
+alphanum(C) -->
+	[C],
+	{ C < 128,			% US-ASCII
+	  code_type(C, alnum)
+	}.
+
+no_enc_extra(0'_) --> "_".