
First version of the datacloud package

authorJan Wielemaker
Fri May 20 15:27:47 2011 +0200
committerJan Wielemaker
Fri May 20 15:27:47 2011 +0200
+/*  Part of ClioPatria SeRQL and SPARQL server
+    Author:        Jan Wielemaker
+    E-mail:
+    WWW: 
+    Copyright (C): 2011, VU University Amsterdam
+    This program is free software; you can redistribute it and/or
+    modify it under the terms of the GNU General Public License
+    as published by the Free Software Foundation; either version 2
+    of the License, or (at your option) any later version.
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    GNU General Public License for more details.
+    You should have received a copy of the GNU General Public
+    License along with this library; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+    As a special exception, if you link this library with other files,
+    compiled with a Free Software compiler, to produce an executable, this
+    library does not by itself cause the resulting executable to be covered
+    by the GNU General Public License. This exception does not however
+    invalidate any other reasons why the executable file might be covered by
+    the GNU General Public License.
+:- module(app_cloud, []).
+:- use_module(library(datacloud)).
+:- use_module(library(settings)).
+:- use_module(library(process)).
+:- use_module(library(http/http_dispatch)).
+:- http_handler(root('datacloud.svg'), datacloud, [spawn,time_limit(1800)]).
+:- setting(cloud:image_file, atom, 'datacloud.svg',
+	   'File for serving the datacloud').
+:- setting(cloud:format, atom, 'svg',
+	   'File for serving the datacloud').
+:- setting(cloud:renderer, oneof([dot,neato,fdp]), 'fdp',
+	   'File for serving the datacloud').
+/** <module> Dynamically serve a datacloud
+%%	datacloud(+Request)
+%	Serve an SVG holding the datacloud
+datacloud(Request) :-
+	setting(cloud:image_file, File),
+	with_mutex(cloud_server,
+		   make_cloud(File, [])),
+	http_reply_file(File, [], Request).
+make_cloud(File, _) :-
+	access_file(File, read), !.
+make_cloud(File, Options) :-
+	setup_call_cleanup(tmp_file_stream(utf8, TmpFile, Tmp),
+			   write_cloud_graph(Tmp, Options),
+			   close(Tmp)),
+	call_cleanup(run_dot(TmpFile,File),
+		     delete_file(TmpFile)).
+run_dot(In, Out) :-
+	setting(cloud:renderer, Dot),
+	setting(cloud:format, Format),
+	atom_concat('-T', Format, FmtArg),
+	atom_concat('-o', Out, OutArg),
+	process_create(path(Dot),
+		       [FmtArg, OutArg, In],
+		       [
+		       ]).
+:- module(datacloud,
+	  [ write_cloud_graph/2		% +Out, +Options
+	  ]).
+:- use_module(library(semweb/rdf_library)).
+:- use_module(library(semweb/rdf_db)).
+:- use_module(library(semweb/rdf_db)).
+:- use_module(library(debug)).
+:- thread_local
+	mg/1,
+	graphset/2,
+	linkcounter/4.
+/** <module> Create a data-cloud image
+Generates a datacloud based on the Manifest's  information in DOT, to be
+visualized in graphviz. A cloud  node  is   defined  as  an entry in the
+manifest files as used by   library(semweb/rdf_library)  that is labeled
+using the type =|lib:CloudNode|= and consists of all the graphs that are
+loaded (indirectly) through this entry.
+%% make_cloud_graph(+Out, +Options)
+%	Generates a dot atom for loaded  manifest files. Options include
+%	  * min_size(Float)
+%	  * max_size(Float)
+%	  Bounds for the node-sizes.  Defaults are 0.5 and 5.
+%	  * unload_manifests(Boolean)
+%	  If =true=, unload the Manifest files from the RDF store
+%	  after processing.
+write_cloud_graph(Out, Options):-
+	make_manifests_graph,
+	get_all_cn(CNList),
+	get_sizes(CNList,CNSizeList,Options),
+	debug(cloud, 'Collecting links ...', []),
+	get_links(CNSizeList,Links,Options),
+	debug(cloud, 'Create graph ...', []),
+	format(Out, 'graph DataCloud {~n', []),
+	make_dot(Out, CNSizeList,Links),
+	format(Out, '}~n', []),
+	clean_manifests_graph(Options).
+make_dot(Out, CNSizeList,Links):-
+	maplist(dot_node(Out), CNSizeList),
+	maplist(dot_link(Out), Links).
+dot_node(Out, cn(Short,Title,Triples,Size,_LOG)):-
+	write_id(Out, Short),
+	format(atom(ToolTip), '~w (~D triples)', [Title,Triples]),
+	write_attributes([ height(Size),
+			   width(Size),
+			   tooltip(ToolTip)
+			 ], Out),
+	format(Out, ';~n', []).
+dot_link(Out, cl(S,T,Size)):-
+	format(atom(ToolTip), '~D links', [Size]),
+	write_id(Out, S), write(Out, ' -- '), write_id(Out, T),
+	write_attributes([edgetooltip(ToolTip)], Out),
+	format(Out, ';~n', []).
+% For each cloudnode, find the links to other cloudnodes. A link is
+% cl(Source, Target, Strength)
+	call_cleanup(( assert_graph_lookup(CNSizeList),
+		       loop_over_triples,
+		       findall(cl(A,B,C),linkcounter(_,A,B,C),Out)
+		     ),
+		     clean_links).
+	forall(member(Graph,LOG),assert(graphset(Graph,Short))),
+	assert_graph_lookup(CNSizeList).
+	forall((rdf(S, _P, T),
+		rdf(S, rdf:type, _, GS:_),
+		rdf(T, rdf:type, _, GT:_),
+		GS \= GT,
+		graphset(GS,GSS),
+		graphset(GT,GTS),
+		GSS \= GTS
+	       ),
+	       update_counter(GSS, GTS)).
+	term_hash(Short1+Short2, Key),
+	(   retract(linkcounter(Key,Short1,Short2,N))
+	->  M is N+1
+	;   M = 1
+	),
+	assert(linkcounter(Key,Short1,Short2,M)).
+	retractall(linkcounter(_,_,_,_)),
+	retractall(graphset(_,_)).
+% For each cloudnode, retrieve the size of the dot node. The Max and Min
+% of a node size are given in the options through maxSize/1 and
+% minSize/1.
+	option(max_size(MaxSize),Options, 5),
+	option(min_size(MinSize),Options, 0.5),
+	get_max_triples(CNList,MaxTriples),
+	SizeFactor = MaxTriples / MaxSize,
+	findall(cn(Short,Title,Triples,Size,LOG),
+		(member(cn(Short,Title,Triples,LOG),CNList),
+		 get_size(Triples, SizeFactor, MinSize, Size)),
+		CNSizeList).
+get_size(Triples, SizeFactor, MinSize, Size):-
+	FSizeF is Triples / SizeFactor,
+	FSize is integer(FSizeF*100)/100,
+	(   FSize >= MinSize
+	->  Size = FSize
+	;   Size = MinSize
+	).
+	findall(X,member(cn(_,_,X,_LOG),CNList),TriplesList),
+	max_list(TriplesList,MaxTriples).
+	findall(cn(Short,Title,Triples,ListOfGraphs),
+		get_cloudnode(Short, Title, _Lib, ListOfGraphs, Triples),
+		CNList).
+% Make a graph of all the manifests that are loaded.
+	findall(Manifest,rdf_library_index(_ID,manifest(Manifest)),Manifests),
+	Manifests \== [], !,
+	sort(Manifests,SManifests),
+	forall(member(OneMani,SManifests),
+	       ( rdf_load(OneMani,[graph(ManifestGraph)]),
+		 assert(mg(ManifestGraph))
+	       )).
+make_manifests_graph :-
+	existence_error(graph, 'lib:CloudNode').
+clean_manifests_graph(Options) :-
+	option(unload_manifests(true), Options),
+	forall(retract(mg(MG)),
+	       rdf_unload(MG)).
+get_cloudnode(Short, Title, Lib, ListOfGraphs, Triples):-
+	rdf(Lib, rdf:type, lib:'CloudNode'),
+	rdf(Lib, dcterms:title, literal(TitleLit)),
+	plain_text(TitleLit, Title),
+	rdf_library_index(Short, source(Lib)),
+	rdf_library:load_commands(Short, [], Commands),
+	findall(G, member(_D-rdf_load(_File,[graph(G)]),Commands),ListOfGraphs),
+	get_notriples(ListOfGraphs, Triples),
+	debug(cloud, 'Cloud ~w (~w) ~D triples', [Short, Title, Triples]).
+plain_text(lang(_,Text), Text) :- !.
+plain_text(Text, Text).
+get_notriples(List, Size):-
+	findall(Size1,
+		(   member(G, List),
+		    rdf_graph_property(G, triples(Size1))
+		),
+		NumList),!,
+	sumlist(NumList, Size).
+		 /*******************************
+		 *	    GRAPHVIZ UTIL	*
+		 *******************************/
+%%	write_id(+Id) is det.
+%	Write a graphviz ID
+write_id(Out, Id) :-
+	sub_atom(Id, _, 1, _, C),
+	\+ char_type(C, csym), !,
+	c_escape(Id, String),
+	format(Out, '"~s"', [String]).
+write_id(Out, Id) :-
+	write(Out, Id).
+%%	write_attributes(+Attributes:list, +Out:stream) is det.
+%	Write attribute values.  We define some special attributes:
+%		* html(HTML)
+%		Emit as label=<HTML>
+write_attributes([], Out) :- !,
+	format(Out, ' []').
+write_attributes(List, Out) :- !,
+	format(Out, ' [', []),
+	write_attributes_2(List, Out),
+	format(Out, ']', []).
+write_attributes_2([], _).
+write_attributes_2([H|T], Out) :-
+	(   string_attribute(H)
+	->  H =.. [Att, Value],
+	    c_escape(Value, String),
+	    format(Out, ' ~w="~s"', [Att, String])
+	;   html_attribute(H, Att)
+	->  arg(1, H, Value),
+	    format(Out, ' ~w=<~s>', [Att, Value])
+	;   H =.. [Name, Value],
+	    format(Out, ' ~w=~w', [Name, Value])
+	),
+	write_attributes_2(T, Out).
+html_attribute(html(_), label).
+c_escape(Atom, String) :-
+	atom_codes(Atom, Codes),
+	phrase(cstring(Codes), String).
+%%	cstring(+Codes)//
+%	Create a C-string. Normally =dot=  appears   to  be  using UTF-8
+%	encoding. Would there be a  safer   way  to  transport non-ascii
+%	characters, such as \uXXXX?
+cstring([]) -->
+	[].
+cstring([H|T]) -->
+	(   cchar(H)
+	->  []
+	;   [H]
+	),
+	cstring(T).
+cchar(0'") --> "\\\"".
+cchar(0'\n) --> "\\n".
+cchar(0'\t) --> "\\t".
+cchar(0'\b) --> "\\b".
 # this.  Otherwise you can specify the information inline as done below.
 # See for defines fields.
-<> a cpack:Package ;
+<> a cpack:Application ;
 	cpack:packageName "cloud" ;
 	dcterms:title "Visualize loaded datacloud" ;
 	cpack:author [ a foaf:Person ;
@@ -24,7 +24,11 @@
 	    ] ;
-"""Package description goes here.  You can use PlDoc markup.
+"""This package creates a datacload (see e.g.,
+   dynamically based on the data loaded into the current ClioPatria image.  Nodes are
+   defined by the Manifest files as used by library(semweb/rdf_library).  Each entry
+   tagged using rdf:type as =|lib:CloudNode|= is added, where all triples from graphs
+   referenced from this entry are counted.
 """ .