isearch/commit
ADDED: Oren et.al facet sorting
author | Jan Wielemaker |
---|---|
Fri Dec 10 16:43:13 2010 +0100 | |
committer | Jan Wielemaker |
Fri Dec 10 16:43:13 2010 +0100 | |
commit | 86b8df8f5e92cb44580b03abcddd0092afea8a59 |
tree | 7dac72ce45c4fec0b6855e27323c04e992d29647 |
parent | 1218a4431ccd4252490f0acd2c6a32af804395f6 |
Diff style: patch stat
diff --git a/applications/isearch.pl b/applications/isearch.pl index 6d4d213..ecc51a6 100644 --- a/applications/isearch.pl +++ b/applications/isearch.pl @@ -169,8 +169,7 @@ isearch_page2(Options, Request) :- % limit by facet-value filter_results_by_facet(ResultsWithRelation, Filter, Results), - facets(Results, ResultsWithRelation, Filter, Facets0), - maplist(facet_merge_sameas, Facets0, Facets), + compute_facets(Results, ResultsWithRelation, Filter, Facets), length(ResultsWithRelation, NumberOfRelationResults), length(Results, NumberOfResults), @@ -187,6 +186,34 @@ isearch_page2(Options, Request) :- MatchingRelations, Facets, Options) ). +compute_facets(Results, AllResults, Filter, Facets) :- + facets(Results, AllResults, Filter, Facets0), + maplist(facet_merge_sameas, Facets0, Facets1), + length(AllResults, Total), + map_list_to_pairs(facet_quality(Total), Facets1, Keyed), + keysort(Keyed, Sorted), + pairs_values(Sorted, Facets). + +%% facet_quality(+Total, +Facet, -Quality) +% +% Rate the facet. We use 1/Q to avoid the need to reverse the +% search results. + +facet_quality(Total, Facet, Quality) :- + facet_balance(Facet, Balance), + facet_object_cardinality(Facet, Card), + facet_frequency(Facet, Total, Freq), + facet_weight(Facet, Weight), + Quality0 is Balance*Card*Freq*Weight, + ( debugging(facet) + -> Facet = facet(P, _, _), + rdf_display_label(P, Label), + debug(facet, '~p: ~w = ~w*~w*~w*~w~n', + [Label, Quality0, Balance, Card, Freq, Weight]) + ; true + ), + Quality is 1/(Quality0 + 0.00000000000001). + % conversion of json parameters. diff --git a/lib/search/facet.pl b/lib/search/facet.pl index b3b3976..5772fab 100644 --- a/lib/search/facet.pl +++ b/lib/search/facet.pl @@ -31,14 +31,20 @@ :- module(search_facet, [ facets/4, % +Results, +AllResults, +Filter, -Facets facet_merge_sameas/2, % +FacetIn, -FacetOut - facet_condition/3 % +Facets, ?Resource, -Goal + facet_condition/3, % +Facets, ?Resource, -Goal + facet_balance/2, % +Facet, -Balance + facet_object_cardinality/2, % +Facet, -Card + facet_frequency/3, % +Facet, +TotalCount, -Freq + facet_weight/2 % +Facet, -Weight ]). :- use_module(library(assoc)). :- use_module(library(lists)). :- use_module(library(pairs)). +:- use_module(library(apply)). :- use_module(library(semweb/owl_sameas)). :- use_module(library(semweb/rdf_label)). :- use_module(library(semweb/rdf_description)). +:- use_module(library(stat_lists)). /** <module> Computations for facetted search @@ -50,7 +56,7 @@ various operations on facets. A facet is represented as */ :- multifile - cliopatria:facet_exclude_property/1. % ?Resource + cliopatria:facet_weight/2. % ?Resource, ?Weight %% facets(+Results, +AllResults, +Filter, -Facets) @@ -191,27 +197,84 @@ pred_filter([Value|Vs], P, R, Goal) :- pred_filter(Vs, P, R, Rest). + /******************************* + * RANKING * + *******************************/ + +%% facet_balance(+Facet, -Balance) is det. +%% facet_object_cardinality(+Facet, -Card) is det. +%% facet_frequency(+Facet, +TotalResultCount, -Freq). +% +% Balance is a number 0..1 that expresses how wel the result-set +% is distributed over the different values for the facet property. +% +% Object cardinality prefers facets with a reasonable number of +% alternatives. Note that the reference below does *not* mention +% good values for the constants Mu and Sigma. +% +% Facet Frequency says something about the total number of results +% covered by the facet relative to the total (search) result. +% +% @see Eyal Oren, Renaud Delbru, Stefan Decker: Extending +% Faceted Navigation for RDF Data. International Semantic +% Web Conference 2006: 559-572 + +facet_balance(facet(_P, V_R, _Selected), Balance) :- + pairs_values(V_R, RLs), + maplist(length, RLs, Counts), + list_variance(Counts, Var), + Balance is 1 - (Var/(1+Var)). + +facet_object_cardinality(facet(_P, V_R, _Selected), Card) :- + Mu = 10, + Sigma = 40, + length(V_R, NoP), + ( NoP =< 1 + -> Card = 0 + ; Card is exp(-(((NoP-Mu)**2)/(2*Sigma**2))) + ). + +facet_frequency(facet(_P, V_R, _Selected), Total, Freq) :- + pairs_values(V_R, RLs), + append(RLs, AllResults), + sort(AllResults, Unique), + length(Unique, UniqueCount), + Freq is UniqueCount/Total. + +%% facet_weight(?P, ?Weight) +% +% User contributed value that assesses the usefullness of a facet. + :- rdf_meta - facet_exclude_property(r). + facet_weight(r, -). -%facet_exclude_property(rdf:type). -facet_exclude_property(P) :- +facet_weight(P, 0) :- label_property(P). -facet_exclude_property(P) :- +facet_weight(P, 0) :- description_property(P). -facet_exclude_property(dc:identifier). -facet_exclude_property(skos:notation). -facet_exclude_property(owl:sameAs). -facet_exclude_property(rdf:value). +facet_weight(dc:identifier, 0). +facet_weight(skos:notation, 0). +facet_weight(owl:sameAs, 0). +facet_weight(rdf:value, 0). +facet_weight(P, Weight) :- + ( cliopatria:facet_weight(P, Weight0) + -> Weight = Weight0 + ; Weight = 0.5 + ). + facet_exclude_property(P) :- - cliopatria:facet_exclude_property(P). + facet_weight(P, W), + W =:= 0. /******************************* * HOOKS * *******************************/ -%% cliopatria:facet_exclude_property(+Property) is semidet. +%% cliopatria:facet_weight(+Property, -Weight) is semidet. +% +% Expresses the usefullness of Property as a facet value. % -% True if Property must be excluded from creating a facet. +% @param Weight is a float between 0 and 1. 0 excludes the +% facet, while 1 makes the facet `ideal'.