cloud/commit

Do not allow bnodes to link cloud-nodes and provide datacloud_link/4 to make debugging wrong relations easier.

authorJan Wielemaker
Mon Oct 10 10:38:20 2011 +0200
committerJan Wielemaker
Mon Oct 10 10:38:20 2011 +0200
commit7177ae053ad9fe3123cb8806b9a655b0097443e2
treeea0eb601ca7c59fcf3c85f7c87a8d4b652e4cae4
parent9808ea27931d0fb899bf2e1415b74e28f9bb38a8
Diff style: patch stat
diff --git a/lib/datacloud.pl b/lib/datacloud.pl
index 6a4b9bc..4a211c8 100644
--- a/lib/datacloud.pl
+++ b/lib/datacloud.pl
@@ -27,7 +27,8 @@
 */
 
 :- module(datacloud,
-	  [ write_cloud_graph/2		% +Out, +Options
+	  [ write_cloud_graph/2,	% +Out, +Options
+	    datacloud_link/4		% ?SourceSet, ?TargetSet, -Triple, +Opts
 	  ]).
 :- use_module(library(semweb/rdf_library)).
 :- use_module(library(semweb/rdf_db)).
@@ -106,16 +107,31 @@ assert_graph_lookup([cn(Short,_,_,_,LOG)|CNSizeList]):-
 	assert_graph_lookup(CNSizeList).
 
 loop_over_triples:-
-	forall((rdf(S, _P, T),
-		rdf(S, rdf:type, _, GS:_),
-		rdf(T, rdf:type, _, GT:_),
-		GS \= GT,
-		graphset(GS,GSS),
-		graphset(GT,GTS),
-		GSS \= GTS
-	       ),
+	forall(cloud_link(GSS, GTS, _),
 	       update_counter(GSS, GTS)).
 
+%%	cloud_link(?SourceSet, ?TargetSet, -Triple) is nondet.
+%
+%	True if Triple relates  SourceSet   to  TargetSet. Defining what
+%	constitutes a link is  far  from   trivial.  Currently  this  is
+%	defined as a link  between  two   resources  that  are defned in
+%	different graphsets, where `defined in' implies  that there is a
+%	type property registered in the graphset. Also, bnodes can never
+%	link two graphs because  you  cannot   point  to  them  from the
+%	outside.
+
+cloud_link(GSS, GTS, rdf(S,P,T)) :-
+	rdf(S, P, T), atom(T),
+	\+ rdf_is_bnode(S),
+	\+ rdf_is_bnode(T),
+	rdf(S, rdf:type, _, GS:_),
+	rdf(T, rdf:type, _, GT:_),
+	GS \== GT,
+	graphset(GS,GSS),
+	graphset(GT,GTS),
+	GSS \== GTS.
+
+
 update_counter(Short1,Short2):-
 	term_hash(Short1+Short2, Key),
 	(   retract(linkcounter(Key,Short1,Short2,N))
@@ -213,6 +229,25 @@ get_notriples(List, Size):-
 	sumlist(NumList, Size).
 
 
+		 /*******************************
+		 *	      DEBUG		*
+		 *******************************/
+
+%%	datacloud_link(?SourceSet, ?TargetSet, -Triple, +Options) is nondet.
+%
+%	True if Triple relates SourceSet to TargetSet.
+
+datacloud_link(GSS, GTS, Triple, Options) :-
+	make_manifests_graph,
+	get_all_cn(CNList),
+	get_sizes(CNList, CNSizeList, Options),
+	setup_call_cleanup(
+	    assert_graph_lookup(CNSizeList),
+	    cloud_link(GSS, GTS, Triple),
+	    retractall(graphset(_,_))).
+
+
+
 		 /*******************************
 		 *	    GRAPHVIZ UTIL	*
 		 *******************************/