cloud/commit
First version of the datacloud package
author | Jan Wielemaker |
---|---|
Fri May 20 15:27:47 2011 +0200 | |
committer | Jan Wielemaker |
Fri May 20 15:27:47 2011 +0200 | |
commit | 7e5d247da58fbb24af001dd91f6a642ab55e2f8d |
tree | 615245ea8eccc613e3a563b7df6b08fda2dec49c |
parent | 4ef743ac6045f4b96890b79381113451b9501ec7 |
Diff style: patch stat
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b25c15b --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +*~ diff --git a/applications/app_cloud.pl b/applications/app_cloud.pl new file mode 100644 index 0000000..301b682 --- /dev/null +++ b/applications/app_cloud.pl @@ -0,0 +1,77 @@ +/* Part of ClioPatria SeRQL and SPARQL server + + Author: Jan Wielemaker + E-mail: J.Wielemaker@cs.vu.nl + WWW: http://www.swi-prolog.org + Copyright (C): 2011, VU University Amsterdam + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + As a special exception, if you link this library with other files, + compiled with a Free Software compiler, to produce an executable, this + library does not by itself cause the resulting executable to be covered + by the GNU General Public License. This exception does not however + invalidate any other reasons why the executable file might be covered by + the GNU General Public License. +*/ + + +:- module(app_cloud, []). +:- use_module(library(datacloud)). +:- use_module(library(settings)). +:- use_module(library(process)). +:- use_module(library(http/http_dispatch)). + +:- http_handler(root('datacloud.svg'), datacloud, [spawn,time_limit(1800)]). + +:- setting(cloud:image_file, atom, 'datacloud.svg', + 'File for serving the datacloud'). +:- setting(cloud:format, atom, 'svg', + 'File for serving the datacloud'). +:- setting(cloud:renderer, oneof([dot,neato,fdp]), 'fdp', + 'File for serving the datacloud'). + +/** <module> Dynamically serve a datacloud +*/ + +%% datacloud(+Request) +% +% Serve an SVG holding the datacloud + +datacloud(Request) :- + setting(cloud:image_file, File), + with_mutex(cloud_server, + make_cloud(File, [])), + http_reply_file(File, [], Request). + + +make_cloud(File, _) :- + access_file(File, read), !. +make_cloud(File, Options) :- + setup_call_cleanup(tmp_file_stream(utf8, TmpFile, Tmp), + write_cloud_graph(Tmp, Options), + close(Tmp)), + call_cleanup(run_dot(TmpFile,File), + delete_file(TmpFile)). + +run_dot(In, Out) :- + setting(cloud:renderer, Dot), + setting(cloud:format, Format), + atom_concat('-T', Format, FmtArg), + atom_concat('-o', Out, OutArg), + process_create(path(Dot), + [FmtArg, OutArg, In], + [ + ]). diff --git a/config-available/cloud.pl b/config-available/cloud.pl index f446c16..a1680ba 100644 --- a/config-available/cloud.pl +++ b/config-available/cloud.pl @@ -3,3 +3,4 @@ /** <module> Visualize loaded datacloud */ +:- use_module(application(app_cloud)). diff --git a/lib/datacloud.pl b/lib/datacloud.pl new file mode 100644 index 0000000..18ba6e5 --- /dev/null +++ b/lib/datacloud.pl @@ -0,0 +1,285 @@ +/* Part of ClioPatria SeRQL and SPARQL server + + Author: Victor de Boer + WWW: http://www.few.vu.nl/~vbr240/ + Copyright (C): VU University Amsterdam + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + As a special exception, if you link this library with other files, + compiled with a Free Software compiler, to produce an executable, this + library does not by itself cause the resulting executable to be covered + by the GNU General Public License. This exception does not however + invalidate any other reasons why the executable file might be covered by + the GNU General Public License. +*/ + +:- module(datacloud, + [ write_cloud_graph/2 % +Out, +Options + ]). +:- use_module(library(semweb/rdf_library)). +:- use_module(library(semweb/rdf_db)). +:- use_module(library(semweb/rdf_db)). +:- use_module(library(debug)). + +:- thread_local + mg/1, + graphset/2, + linkcounter/4. + +/** <module> Create a data-cloud image + +Generates a datacloud based on the Manifest's information in DOT, to be +visualized in graphviz. A cloud node is defined as an entry in the +manifest files as used by library(semweb/rdf_library) that is labeled +using the type =|lib:CloudNode|= and consists of all the graphs that are +loaded (indirectly) through this entry. +*/ + +%% make_cloud_graph(+Out, +Options) +% +% Generates a dot atom for loaded manifest files. Options include +% +% * min_size(Float) +% * max_size(Float) +% Bounds for the node-sizes. Defaults are 0.5 and 5. +% * unload_manifests(Boolean) +% If =true=, unload the Manifest files from the RDF store +% after processing. + +write_cloud_graph(Out, Options):- + make_manifests_graph, + get_all_cn(CNList), + get_sizes(CNList,CNSizeList,Options), + debug(cloud, 'Collecting links ...', []), + get_links(CNSizeList,Links,Options), + debug(cloud, 'Create graph ...', []), + format(Out, 'graph DataCloud {~n', []), + make_dot(Out, CNSizeList,Links), + format(Out, '}~n', []), + clean_manifests_graph(Options). + +make_dot(Out, CNSizeList,Links):- + maplist(dot_node(Out), CNSizeList), + maplist(dot_link(Out), Links). + +dot_node(Out, cn(Short,Title,Triples,Size,_LOG)):- + write_id(Out, Short), + format(atom(ToolTip), '~w (~D triples)', [Title,Triples]), + write_attributes([ height(Size), + width(Size), + tooltip(ToolTip) + ], Out), + format(Out, ';~n', []). + +dot_link(Out, cl(S,T,Size)):- + format(atom(ToolTip), '~D links', [Size]), + write_id(Out, S), write(Out, ' -- '), write_id(Out, T), + write_attributes([edgetooltip(ToolTip)], Out), + format(Out, ';~n', []). + +% For each cloudnode, find the links to other cloudnodes. A link is +% cl(Source, Target, Strength) +% +get_links(CNSizeList,Out,_Options):- + call_cleanup(( assert_graph_lookup(CNSizeList), + loop_over_triples, + findall(cl(A,B,C),linkcounter(_,A,B,C),Out) + ), + clean_links). + +assert_graph_lookup([]). +assert_graph_lookup([cn(Short,_,_,_,LOG)|CNSizeList]):- + forall(member(Graph,LOG),assert(graphset(Graph,Short))), + assert_graph_lookup(CNSizeList). + +loop_over_triples:- + forall((rdf(S, _P, T), + rdf(S, rdf:type, _, GS:_), + rdf(T, rdf:type, _, GT:_), + GS \= GT, + graphset(GS,GSS), + graphset(GT,GTS), + GSS \= GTS + ), + update_counter(GSS, GTS)). + +update_counter(Short1,Short2):- + term_hash(Short1+Short2, Key), + ( retract(linkcounter(Key,Short1,Short2,N)) + -> M is N+1 + ; M = 1 + ), + assert(linkcounter(Key,Short1,Short2,M)). + +clean_links:- + retractall(linkcounter(_,_,_,_)), + retractall(graphset(_,_)). + + +% For each cloudnode, retrieve the size of the dot node. The Max and Min +% of a node size are given in the options through maxSize/1 and +% minSize/1. +get_sizes(CNList,CNSizeList,Options):- + option(max_size(MaxSize),Options, 5), + option(min_size(MinSize),Options, 0.5), + get_max_triples(CNList,MaxTriples), + SizeFactor = MaxTriples / MaxSize, + + findall(cn(Short,Title,Triples,Size,LOG), + (member(cn(Short,Title,Triples,LOG),CNList), + get_size(Triples, SizeFactor, MinSize, Size)), + CNSizeList). + +get_size(Triples, SizeFactor, MinSize, Size):- + FSizeF is Triples / SizeFactor, + FSize is integer(FSizeF*100)/100, + ( FSize >= MinSize + -> Size = FSize + ; Size = MinSize + ). + + +get_max_triples(CNList,MaxTriples):- + findall(X,member(cn(_,_,X,_LOG),CNList),TriplesList), + max_list(TriplesList,MaxTriples). + +get_all_cn(CNList):- + findall(cn(Short,Title,Triples,ListOfGraphs), + get_cloudnode(Short, Title, _Lib, ListOfGraphs, Triples), + CNList). + + +% Make a graph of all the manifests that are loaded. +make_manifests_graph:- + findall(Manifest,rdf_library_index(_ID,manifest(Manifest)),Manifests), + Manifests \== [], !, + sort(Manifests,SManifests), + forall(member(OneMani,SManifests), + ( rdf_load(OneMani,[graph(ManifestGraph)]), + assert(mg(ManifestGraph)) + )). +make_manifests_graph :- + existence_error(graph, 'lib:CloudNode'). + +clean_manifests_graph(Options) :- + option(unload_manifests(true), Options), + forall(retract(mg(MG)), + rdf_unload(MG)). +clean_manifests_graph(_). + + +get_cloudnode(Short, Title, Lib, ListOfGraphs, Triples):- + rdf(Lib, rdf:type, lib:'CloudNode'), + rdf(Lib, dcterms:title, literal(TitleLit)), + plain_text(TitleLit, Title), + rdf_library_index(Short, source(Lib)), + rdf_library:load_commands(Short, [], Commands), + findall(G, member(_D-rdf_load(_File,[graph(G)]),Commands),ListOfGraphs), + get_notriples(ListOfGraphs, Triples), + debug(cloud, 'Cloud ~w (~w) ~D triples', [Short, Title, Triples]). + +plain_text(lang(_,Text), Text) :- !. +plain_text(Text, Text). + +get_notriples(List, Size):- + findall(Size1, + ( member(G, List), + rdf_graph_property(G, triples(Size1)) + ), + NumList),!, + sumlist(NumList, Size). + + + /******************************* + * GRAPHVIZ UTIL * + *******************************/ + +%% write_id(+Id) is det. +% +% Write a graphviz ID + +write_id(Out, Id) :- + sub_atom(Id, _, 1, _, C), + \+ char_type(C, csym), !, + c_escape(Id, String), + format(Out, '"~s"', [String]). +write_id(Out, Id) :- + write(Out, Id). + +%% write_attributes(+Attributes:list, +Out:stream) is det. +% +% Write attribute values. We define some special attributes: +% +% * html(HTML) +% Emit as label=<HTML> + +write_attributes([], Out) :- !, + format(Out, ' []'). +write_attributes(List, Out) :- !, + format(Out, ' [', []), + write_attributes_2(List, Out), + format(Out, ']', []). + +write_attributes_2([], _). +write_attributes_2([H|T], Out) :- + ( string_attribute(H) + -> H =.. [Att, Value], + c_escape(Value, String), + format(Out, ' ~w="~s"', [Att, String]) + ; html_attribute(H, Att) + -> arg(1, H, Value), + format(Out, ' ~w=<~s>', [Att, Value]) + ; H =.. [Name, Value], + format(Out, ' ~w=~w', [Name, Value]) + ), + write_attributes_2(T, Out). + + +string_attribute(label(_)). +string_attribute(url(_)). +string_attribute(href(_)). +string_attribute('URL'(_)). +string_attribute(fillcolor(_)). +string_attribute(style(_)). +string_attribute(tooltip(_)). +string_attribute(edgetooltip(_)). + +html_attribute(html(_), label). + + +c_escape(Atom, String) :- + atom_codes(Atom, Codes), + phrase(cstring(Codes), String). + +%% cstring(+Codes)// +% +% Create a C-string. Normally =dot= appears to be using UTF-8 +% encoding. Would there be a safer way to transport non-ascii +% characters, such as \uXXXX? + +cstring([]) --> + []. +cstring([H|T]) --> + ( cchar(H) + -> [] + ; [H] + ), + cstring(T). + +cchar(0'") --> "\\\"". +cchar(0'\n) --> "\\n". +cchar(0'\t) --> "\\t". +cchar(0'\b) --> "\\b". diff --git a/rdf/cpack/cloud.ttl b/rdf/cpack/cloud.ttl index aa720e7..13f7d75 100644 --- a/rdf/cpack/cloud.ttl +++ b/rdf/cpack/cloud.ttl @@ -11,7 +11,7 @@ # this. Otherwise you can specify the information inline as done below. # See http://xmlns.com/foaf/spec/ for defines fields. -<> a cpack:Package ; +<> a cpack:Application ; cpack:packageName "cloud" ; dcterms:title "Visualize loaded datacloud" ; cpack:author [ a foaf:Person ; @@ -24,7 +24,11 @@ ] ; cpack:description -"""Package description goes here. You can use PlDoc markup. +"""This package creates a datacload (see e.g., http://richard.cyganiak.de/2007/10/lod/) + dynamically based on the data loaded into the current ClioPatria image. Nodes are + defined by the Manifest files as used by library(semweb/rdf_library). Each entry + tagged using rdf:type as =|lib:CloudNode|= is added, where all triples from graphs + referenced from this entry are counted. """ .