rdf_qa/commit

first working version, detecting only cyclic properties in RDF

authorJacco van Ossenbruggen
Wed Jan 25 16:53:08 2012 +0100
committerJacco van Ossenbruggen
Wed Jan 25 16:53:08 2012 +0100
commitcd4d472d94c90748bb585fdc8557f8bfb46f1b4e
tree57d48bd34cd621aad17f7a6e612793b48391e585
parent488501574e73b6489e8be1350348ef107f3f3171
Diff style: patch stat
diff --git a/applications/rdf_qa.pl b/applications/rdf_qa.pl
new file mode 100644
index 0000000..652514c
--- /dev/null
+++ b/applications/rdf_qa.pl
@@ -0,0 +1,204 @@
+:- module(rdf_qa,
+	  [
+	  ]).
+
+
+:- use_module(library(semweb/rdf_db)).
+:- use_module(library(semweb/rdfs)).
+:- use_module(library(http/http_dispatch)).
+:- use_module(library(http/http_parameters)).
+:- use_module(library(http/html_write)).
+:- use_module(library('http/html_head')).
+:- use_module(library(url)).
+:- use_module(library(apply)).
+:- use_module(library(option)).
+:- use_module(library(settings)).
+
+:- use_module(library(count)).
+:- use_module(components(label)).
+:- use_module(components(qa_default_heuristics)).
+
+:- http_handler(cliopatria('qa_index'), rdf_qa_index, [content_type(text/html)]).
+:- http_handler(cliopatria('qa'),	rdf_qa,	   [content_type(text/html)]).
+
+rdf_qa_index(_Request) :-
+	findall(Class, clause(qa:rdf_warning(Class, _), _), Classes),
+	reply_html_page(cliopatria(main),
+			title('Generate RDF quality reports'),
+			body(\qa_index(Classes))
+		       ).
+
+%%	rdf_qa(Request)
+%
+%	Display quality issues with the loaded RDF.
+
+rdf_qa(Request) :-
+	http_parameters(Request,
+			[ max_per_ns(Max0, [integer, default(20)]),
+			  class(Class, [optional(true)]),
+			  ns(NS, [optional(true)]),
+			  show(Show, [oneof([local_view,uri]),
+				      default(local_view)])
+			]),
+	(   nonvar(NS)
+	->  Max = inf
+	;   Max = Max0
+	),
+	include(ground, [ns(NS), max_per_ns(Max), show(Show)], Options),
+	findall(Class, clause(qa:rdf_warning(Class, _), _), Classes),
+	warnings_by_class(Classes, ByCLass, Options),
+	reply_html_page(cliopatria(main),
+			title('RDF Quality report'),
+			body(\qa_report(ByCLass, Options))
+		       ).
+
+qa_index([]) --> [].
+qa_index([Class|T]) -->
+	{ answer_count(URI,qa:rdf_warning(Class, URI), 100, C),
+	  C > 0, !,
+	  http_location([ path(qa),
+			  search([ class=Class ])
+			], Location)
+	},
+	html_requires(qa),
+	html(div(class(qa_class_title), a(href(Location), [\qa:class_label(Class), \count(100, C)]))),
+	qa_index(T).
+qa_index([_|T]) -->
+	qa_index(T).
+
+count(C, C) -->
+	html(' (> ~D)'-[C]).
+count(_, C) -->
+	html(' (~D)'-[C]).
+
+qa_report([], _) --> !,
+        html(p('Could not find any problems in the RDF.')).
+qa_report(Classes, Options) -->
+        html_requires(qa),
+        html([
+               \report_by_class(Classes, Options)
+             ]).
+
+report_by_class([], _) -->
+        [].
+report_by_class([Class-Grouped|T], Options) -->
+        html([ h3(class(qa_class_heading),a(name(Class), \qa:class_label(Class))),
+               ul(\show_groups(Grouped, [class(Class)|Options]))
+             ]),
+        report_by_class(T, Options).
+
+show_groups([], _) -->
+        [].
+show_groups([NS-URIs|T], Options) -->
+        html(li(class(show_groups_li), [ \show_namespace(NS, Options),
+                  ol(\show_uris(URIs, [ns(NS)|Options]))
+                ])),
+        show_groups(T, Options).
+
+
+show_namespace(NS, Options) -->
+        { atom_concat('__file://', Path, NS), !,
+          option(class(Class), Options)
+        },
+        html([\qa:class_label(Class), ' for blank nodes from ', tt(Path)]).
+show_namespace(NS, Options) -->
+        { option(class(Class), Options)
+        },
+        html([\qa:class_label(Class), ' for namespace ', tt(NS)]).
+
+show_uris(URIs, Options) -->
+        { option(max_per_ns(Max), Options, 20),
+          option(show(Show), Options, local_view),
+          length(URIs, Len)
+        },
+        list_uris(URIs, Show),
+        (   {Max == inf ; Len < Max}
+        ->  []
+        ;   more_link(Options)
+        ).
+
+show_uri(H, uri) --> !,
+        html(li(class(show_uri), a(href(H), H))).
+show_uri(H, _) -->
+        html(li(class(show_uri_local), \rdf_link(H))).
+
+show_triple(rdf(S,P,O), _) -->
+        { rdf(S,P,O,DB) }, !,
+        html(li([ '{',
+                  \rdf_link(S),
+                  ', ',
+                  \rdf_link(P),
+                  ', ',
+                  \rdf_link(O),
+                  '}',
+                  \source(DB)
+                ])).
+
+more_link(Options) -->
+        { option(class(Class), Options),
+          option(ns(NS), Options),
+          http_location([ path(qa),
+                          search([ ns=NS, class=Class ])
+                        ], Location)
+        },
+        html(['... ', a(href(Location), 'show all')]).
+
+source(URI:Line) -->
+        { Line < 1e9, !,
+          file_base_name(URI, Base)
+        },
+        html([' from ', code(Base), ' at line ~D'-[Line]]).
+source(URI:Time) --> !,
+        { format_time(string(T), '%F:~R', Time) },
+        html([' by ', code(URI), ' at ', T]).
+source(X) -->                           % debugging
+        { term_to_atom(X, A) },
+        html(A).
+
+
+
+list_uris([], _) -->
+        [].
+list_uris([H|T], Show) -->
+        show_hit(H, Show),
+        list_uris(T, Show).
+
+show_hit(H, Show) -->
+        { atom(H) }, !,
+        show_uri(H, Show).
+show_hit(H, Show) -->
+        { H = rdf(_,_,_) }, !,
+        show_triple(H, Show).
+
+
+
+		 /*******************************
+		 *	     COLLECT		*
+		 *******************************/
+
+warnings_by_class([], [], _).
+warnings_by_class([H|T0], [H-Warnings|T], Options) :-
+	warnings_for_class(H, Warnings, Options),
+	Warnings \== [], !,
+	warnings_by_class(T0, T, Options).
+warnings_by_class([_|T0], T, Options) :-
+	warnings_by_class(T0, T, Options).
+
+warnings_for_class(Class, Grouped, Options) :-
+	option(max_per_ns(Max), Options, 20),
+	option(ns(NS), Options, _),
+	answer_pair_set(NS-URI, warning_by_ns(Class, NS, URI),
+			  inf, Max, Grouped).
+
+
+
+warning_by_ns(Warning, NS, URI) :-
+	qa:rdf_warning(Warning, URI),
+	namespace(URI, NS).
+
+namespace(rdf(URI, _, _), NS) :- !,
+	rdf_split_url(NS, _Id, URI).
+namespace(URI, NS) :- !,
+	rdf_split_url(NS, _Id, URI).
+
+
diff --git a/components/cycle_prop.pl b/components/cycle_prop.pl
new file mode 100644
index 0000000..f329c34
--- /dev/null
+++ b/components/cycle_prop.pl
@@ -0,0 +1,14 @@
+:- module(cycle_prop, []).
+
+:- use_module(qa_heuristics).
+:- use_module(library(semweb/rdf_db)).
+:- use_module(library(http/html_write)).
+
+
+qa:rdf_warning(cycle_property, rdf(S,P,S)) :-
+        rdf(S, P, S),
+        \+ (  rdf_equal(P, rdf:type),
+              rdf_equal(S, rdfs:'Class')
+	   ).
+qa:class_label(cycle_property) -->
+        html(['Triples with ', i('Subject == Object')]).
diff --git a/components/qa_default_heuristics.pl b/components/qa_default_heuristics.pl
new file mode 100644
index 0000000..48a9ebb
--- /dev/null
+++ b/components/qa_default_heuristics.pl
@@ -0,0 +1,3 @@
+:- module(defaults, []).
+
+:- use_module(cycle_prop).
diff --git a/components/qa_heuristics.pl b/components/qa_heuristics.pl
new file mode 100644
index 0000000..14b18cc
--- /dev/null
+++ b/components/qa_heuristics.pl
@@ -0,0 +1,12 @@
+:- module(qa_heuristics, [
+			  qa:rdf_warning/2,  % +Class, -URI
+			  qa:class_label//1 % +Class
+			 ]).
+
+:- multifile
+	qa:rdf_warning/2,
+	qa:class_label//1.
+
+:- rdf_meta
+	qa:rdf_warning(o,r).
+
diff --git a/config-available/rdf_qa.pl b/config-available/rdf_qa.pl
index 1e19a97..a157c0b 100644
--- a/config-available/rdf_qa.pl
+++ b/config-available/rdf_qa.pl
@@ -3,3 +3,4 @@
 /** <module> Heuristics for spotting problems in RDF data
 */
 
+:- use_module(applications(rdf_qa)).