cloud/commit

First version of the datacloud package

authorJan Wielemaker
Fri May 20 15:27:47 2011 +0200
committerJan Wielemaker
Fri May 20 15:27:47 2011 +0200
commit7e5d247da58fbb24af001dd91f6a642ab55e2f8d
tree615245ea8eccc613e3a563b7df6b08fda2dec49c
parent4ef743ac6045f4b96890b79381113451b9501ec7
Diff style: patch stat
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..b25c15b
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+*~
diff --git a/applications/app_cloud.pl b/applications/app_cloud.pl
new file mode 100644
index 0000000..301b682
--- /dev/null
+++ b/applications/app_cloud.pl
@@ -0,0 +1,77 @@
+/*  Part of ClioPatria SeRQL and SPARQL server
+
+    Author:        Jan Wielemaker
+    E-mail:        J.Wielemaker@cs.vu.nl
+    WWW:           http://www.swi-prolog.org
+    Copyright (C): 2011, VU University Amsterdam
+
+    This program is free software; you can redistribute it and/or
+    modify it under the terms of the GNU General Public License
+    as published by the Free Software Foundation; either version 2
+    of the License, or (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public
+    License along with this library; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+    As a special exception, if you link this library with other files,
+    compiled with a Free Software compiler, to produce an executable, this
+    library does not by itself cause the resulting executable to be covered
+    by the GNU General Public License. This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+
+:- module(app_cloud, []).
+:- use_module(library(datacloud)).
+:- use_module(library(settings)).
+:- use_module(library(process)).
+:- use_module(library(http/http_dispatch)).
+
+:- http_handler(root('datacloud.svg'), datacloud, [spawn,time_limit(1800)]).
+
+:- setting(cloud:image_file, atom, 'datacloud.svg',
+	   'File for serving the datacloud').
+:- setting(cloud:format, atom, 'svg',
+	   'File for serving the datacloud').
+:- setting(cloud:renderer, oneof([dot,neato,fdp]), 'fdp',
+	   'File for serving the datacloud').
+
+/** <module> Dynamically serve a datacloud
+*/
+
+%%	datacloud(+Request)
+%
+%	Serve an SVG holding the datacloud
+
+datacloud(Request) :-
+	setting(cloud:image_file, File),
+	with_mutex(cloud_server,
+		   make_cloud(File, [])),
+	http_reply_file(File, [], Request).
+
+
+make_cloud(File, _) :-
+	access_file(File, read), !.
+make_cloud(File, Options) :-
+	setup_call_cleanup(tmp_file_stream(utf8, TmpFile, Tmp),
+			   write_cloud_graph(Tmp, Options),
+			   close(Tmp)),
+	call_cleanup(run_dot(TmpFile,File),
+		     delete_file(TmpFile)).
+
+run_dot(In, Out) :-
+	setting(cloud:renderer, Dot),
+	setting(cloud:format, Format),
+	atom_concat('-T', Format, FmtArg),
+	atom_concat('-o', Out, OutArg),
+	process_create(path(Dot),
+		       [FmtArg, OutArg, In],
+		       [
+		       ]).
diff --git a/config-available/cloud.pl b/config-available/cloud.pl
index f446c16..a1680ba 100644
--- a/config-available/cloud.pl
+++ b/config-available/cloud.pl
@@ -3,3 +3,4 @@
 /** <module> Visualize loaded datacloud
 */
 
+:- use_module(application(app_cloud)).
diff --git a/lib/datacloud.pl b/lib/datacloud.pl
new file mode 100644
index 0000000..18ba6e5
--- /dev/null
+++ b/lib/datacloud.pl
@@ -0,0 +1,285 @@
+/*  Part of ClioPatria SeRQL and SPARQL server
+
+    Author:        Victor de Boer
+    WWW:           http://www.few.vu.nl/~vbr240/
+    Copyright (C): VU University Amsterdam
+
+    This program is free software; you can redistribute it and/or
+    modify it under the terms of the GNU General Public License
+    as published by the Free Software Foundation; either version 2
+    of the License, or (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public
+    License along with this library; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+    As a special exception, if you link this library with other files,
+    compiled with a Free Software compiler, to produce an executable, this
+    library does not by itself cause the resulting executable to be covered
+    by the GNU General Public License. This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+*/
+
+:- module(datacloud,
+	  [ write_cloud_graph/2		% +Out, +Options
+	  ]).
+:- use_module(library(semweb/rdf_library)).
+:- use_module(library(semweb/rdf_db)).
+:- use_module(library(semweb/rdf_db)).
+:- use_module(library(debug)).
+
+:- thread_local
+	mg/1,
+	graphset/2,
+	linkcounter/4.
+
+/** <module> Create a data-cloud image
+
+Generates a datacloud based on the Manifest's  information in DOT, to be
+visualized in graphviz. A cloud  node  is   defined  as  an entry in the
+manifest files as used by   library(semweb/rdf_library)  that is labeled
+using the type =|lib:CloudNode|= and consists of all the graphs that are
+loaded (indirectly) through this entry.
+*/
+
+%% make_cloud_graph(+Out, +Options)
+%
+%	Generates a dot atom for loaded  manifest files. Options include
+%
+%	  * min_size(Float)
+%	  * max_size(Float)
+%	  Bounds for the node-sizes.  Defaults are 0.5 and 5.
+%	  * unload_manifests(Boolean)
+%	  If =true=, unload the Manifest files from the RDF store
+%	  after processing.
+
+write_cloud_graph(Out, Options):-
+	make_manifests_graph,
+	get_all_cn(CNList),
+	get_sizes(CNList,CNSizeList,Options),
+	debug(cloud, 'Collecting links ...', []),
+	get_links(CNSizeList,Links,Options),
+	debug(cloud, 'Create graph ...', []),
+	format(Out, 'graph DataCloud {~n', []),
+	make_dot(Out, CNSizeList,Links),
+	format(Out, '}~n', []),
+	clean_manifests_graph(Options).
+
+make_dot(Out, CNSizeList,Links):-
+	maplist(dot_node(Out), CNSizeList),
+	maplist(dot_link(Out), Links).
+
+dot_node(Out, cn(Short,Title,Triples,Size,_LOG)):-
+	write_id(Out, Short),
+	format(atom(ToolTip), '~w (~D triples)', [Title,Triples]),
+	write_attributes([ height(Size),
+			   width(Size),
+			   tooltip(ToolTip)
+			 ], Out),
+	format(Out, ';~n', []).
+
+dot_link(Out, cl(S,T,Size)):-
+	format(atom(ToolTip), '~D links', [Size]),
+	write_id(Out, S), write(Out, ' -- '), write_id(Out, T),
+	write_attributes([edgetooltip(ToolTip)], Out),
+	format(Out, ';~n', []).
+
+% For each cloudnode, find the links to other cloudnodes. A link is
+% cl(Source, Target, Strength)
+%
+get_links(CNSizeList,Out,_Options):-
+	call_cleanup(( assert_graph_lookup(CNSizeList),
+		       loop_over_triples,
+		       findall(cl(A,B,C),linkcounter(_,A,B,C),Out)
+		     ),
+		     clean_links).
+
+assert_graph_lookup([]).
+assert_graph_lookup([cn(Short,_,_,_,LOG)|CNSizeList]):-
+	forall(member(Graph,LOG),assert(graphset(Graph,Short))),
+	assert_graph_lookup(CNSizeList).
+
+loop_over_triples:-
+	forall((rdf(S, _P, T),
+		rdf(S, rdf:type, _, GS:_),
+		rdf(T, rdf:type, _, GT:_),
+		GS \= GT,
+		graphset(GS,GSS),
+		graphset(GT,GTS),
+		GSS \= GTS
+	       ),
+	       update_counter(GSS, GTS)).
+
+update_counter(Short1,Short2):-
+	term_hash(Short1+Short2, Key),
+	(   retract(linkcounter(Key,Short1,Short2,N))
+	->  M is N+1
+	;   M = 1
+	),
+	assert(linkcounter(Key,Short1,Short2,M)).
+
+clean_links:-
+	retractall(linkcounter(_,_,_,_)),
+	retractall(graphset(_,_)).
+
+
+% For each cloudnode, retrieve the size of the dot node. The Max and Min
+% of a node size are given in the options through maxSize/1 and
+% minSize/1.
+get_sizes(CNList,CNSizeList,Options):-
+	option(max_size(MaxSize),Options, 5),
+	option(min_size(MinSize),Options, 0.5),
+	get_max_triples(CNList,MaxTriples),
+	SizeFactor = MaxTriples / MaxSize,
+
+	findall(cn(Short,Title,Triples,Size,LOG),
+		(member(cn(Short,Title,Triples,LOG),CNList),
+		 get_size(Triples, SizeFactor, MinSize, Size)),
+		CNSizeList).
+
+get_size(Triples, SizeFactor, MinSize, Size):-
+	FSizeF is Triples / SizeFactor,
+	FSize is integer(FSizeF*100)/100,
+	(   FSize >= MinSize
+	->  Size = FSize
+	;   Size = MinSize
+	).
+
+
+get_max_triples(CNList,MaxTriples):-
+	findall(X,member(cn(_,_,X,_LOG),CNList),TriplesList),
+	max_list(TriplesList,MaxTriples).
+
+get_all_cn(CNList):-
+	findall(cn(Short,Title,Triples,ListOfGraphs),
+		get_cloudnode(Short, Title, _Lib, ListOfGraphs, Triples),
+		CNList).
+
+
+% Make a graph of all the manifests that are loaded.
+make_manifests_graph:-
+	findall(Manifest,rdf_library_index(_ID,manifest(Manifest)),Manifests),
+	Manifests \== [], !,
+	sort(Manifests,SManifests),
+	forall(member(OneMani,SManifests),
+	       ( rdf_load(OneMani,[graph(ManifestGraph)]),
+		 assert(mg(ManifestGraph))
+	       )).
+make_manifests_graph :-
+	existence_error(graph, 'lib:CloudNode').
+
+clean_manifests_graph(Options) :-
+	option(unload_manifests(true), Options),
+	forall(retract(mg(MG)),
+	       rdf_unload(MG)).
+clean_manifests_graph(_).
+
+
+get_cloudnode(Short, Title, Lib, ListOfGraphs, Triples):-
+	rdf(Lib, rdf:type, lib:'CloudNode'),
+	rdf(Lib, dcterms:title, literal(TitleLit)),
+	plain_text(TitleLit, Title),
+	rdf_library_index(Short, source(Lib)),
+	rdf_library:load_commands(Short, [], Commands),
+	findall(G, member(_D-rdf_load(_File,[graph(G)]),Commands),ListOfGraphs),
+	get_notriples(ListOfGraphs, Triples),
+	debug(cloud, 'Cloud ~w (~w) ~D triples', [Short, Title, Triples]).
+
+plain_text(lang(_,Text), Text) :- !.
+plain_text(Text, Text).
+
+get_notriples(List, Size):-
+	findall(Size1,
+		(   member(G, List),
+		    rdf_graph_property(G, triples(Size1))
+		),
+		NumList),!,
+	sumlist(NumList, Size).
+
+
+		 /*******************************
+		 *	    GRAPHVIZ UTIL	*
+		 *******************************/
+
+%%	write_id(+Id) is det.
+%
+%	Write a graphviz ID
+
+write_id(Out, Id) :-
+	sub_atom(Id, _, 1, _, C),
+	\+ char_type(C, csym), !,
+	c_escape(Id, String),
+	format(Out, '"~s"', [String]).
+write_id(Out, Id) :-
+	write(Out, Id).
+
+%%	write_attributes(+Attributes:list, +Out:stream) is det.
+%
+%	Write attribute values.  We define some special attributes:
+%
+%		* html(HTML)
+%		Emit as label=<HTML>
+
+write_attributes([], Out) :- !,
+	format(Out, ' []').
+write_attributes(List, Out) :- !,
+	format(Out, ' [', []),
+	write_attributes_2(List, Out),
+	format(Out, ']', []).
+
+write_attributes_2([], _).
+write_attributes_2([H|T], Out) :-
+	(   string_attribute(H)
+	->  H =.. [Att, Value],
+	    c_escape(Value, String),
+	    format(Out, ' ~w="~s"', [Att, String])
+	;   html_attribute(H, Att)
+	->  arg(1, H, Value),
+	    format(Out, ' ~w=<~s>', [Att, Value])
+	;   H =.. [Name, Value],
+	    format(Out, ' ~w=~w', [Name, Value])
+	),
+	write_attributes_2(T, Out).
+
+
+string_attribute(label(_)).
+string_attribute(url(_)).
+string_attribute(href(_)).
+string_attribute('URL'(_)).
+string_attribute(fillcolor(_)).
+string_attribute(style(_)).
+string_attribute(tooltip(_)).
+string_attribute(edgetooltip(_)).
+
+html_attribute(html(_), label).
+
+
+c_escape(Atom, String) :-
+	atom_codes(Atom, Codes),
+	phrase(cstring(Codes), String).
+
+%%	cstring(+Codes)//
+%
+%	Create a C-string. Normally =dot=  appears   to  be  using UTF-8
+%	encoding. Would there be a  safer   way  to  transport non-ascii
+%	characters, such as \uXXXX?
+
+cstring([]) -->
+	[].
+cstring([H|T]) -->
+	(   cchar(H)
+	->  []
+	;   [H]
+	),
+	cstring(T).
+
+cchar(0'") --> "\\\"".
+cchar(0'\n) --> "\\n".
+cchar(0'\t) --> "\\t".
+cchar(0'\b) --> "\\b".
diff --git a/rdf/cpack/cloud.ttl b/rdf/cpack/cloud.ttl
index aa720e7..13f7d75 100644
--- a/rdf/cpack/cloud.ttl
+++ b/rdf/cpack/cloud.ttl
@@ -11,7 +11,7 @@
 # this.  Otherwise you can specify the information inline as done below.
 # See http://xmlns.com/foaf/spec/ for defines fields.
 
-<> a cpack:Package ;
+<> a cpack:Application ;
 	cpack:packageName "cloud" ;
 	dcterms:title "Visualize loaded datacloud" ;
 	cpack:author [ a foaf:Person ;
@@ -24,7 +24,11 @@
 	    ] ;
 	cpack:description
 
-"""Package description goes here.  You can use PlDoc markup.
+"""This package creates a datacload (see e.g., http://richard.cyganiak.de/2007/10/lod/)
+   dynamically based on the data loaded into the current ClioPatria image.  Nodes are
+   defined by the Manifest files as used by library(semweb/rdf_library).  Each entry
+   tagged using rdf:type as =|lib:CloudNode|= is added, where all triples from graphs
+   referenced from this entry are counted.
 """ .