tag_matcher/commit
add support for stemming
author | Michiel Hildebrand |
---|---|
Sat Jan 22 20:08:55 2011 +0100 | |
committer | Michiel Hildebrand |
Sat Jan 22 20:08:55 2011 +0100 | |
commit | 52c060dfaeacd7d8a3cd4577e26388e53ef7b1f3 |
tree | 3fde034faaf605f72d466331d9c8ee190c9f788e |
parent | 2dbeeb2fcc7ad73722c71cb9ae6e7591f18a66d8 |
Diff style: patch stat
diff --git a/api/tag_matcher.pl b/api/tag_matcher.pl index 95065a9..58d84a2 100644 --- a/api/tag_matcher.pl +++ b/api/tag_matcher.pl @@ -1,5 +1,6 @@ :- module(tag_matcher, - [ find_tag_concept/3 + [ tag_match/5, + find_tag_concept/3 ]). :- use_module(library(http/http_dispatch)). @@ -12,8 +13,10 @@ :- use_module(library(semweb/rdf_db)). :- use_module(library(semweb/rdfs)). :- use_module(library(semweb/rdf_litindex)). -:- use_module(library(find_resource)). +:- use_module(library(snowball)). + :- use_module(components(label)). +:- use_module(library(find_resource)). /** <module> Tag matcher @@ -44,10 +47,11 @@ http_tag_match_page(Request) :- t2(Tag2, [optional(true), description('another tag')]) ]), - ( nonvar(Tag1), - nonvar(Tag2) - -> tag_match(Tag1, Tag2, T1Data, T2Data, Type) - ; true + ( (var(Tag1); var(Tag2)) + -> true + ; tag_match(Tag1, Tag2, MatchType, T1Data, T2Data) + -> true + ; MatchType = false ), reply_html_page(cliopatria(default), [title('Tag matcher'), @@ -69,7 +73,7 @@ http_tag_match_page(Request) :- \html_match_examples ])), div(class('tag-match'), - \html_tag_match(Type)), + \html_tag_match(MatchType)), div(class('tag-data'), [ \html_tag_data(1, T1Data), \html_tag_data(2, T2Data) @@ -112,26 +116,35 @@ data_cell(R) --> html_tag_match(Type) --> { var(Type) }, !. -html_tag_match(@false) --> +html_tag_match(false) --> html(h3('no match found')). html_tag_match(Type) --> html(h3(['match: ', Type])). html_match_examples --> - html([\example_link(exact, 'obama', 'obama'), - ' | ', - \example_link(synonym, 'auto', 'wagen'), - ' | ', - \example_link(specific, labrador, hond), - ' | ', - \example_link(generic, paal, amsterdammertje) - ]). - -example_link(Name, Tag1, Tag2) --> + { findall(e(N,T1,T2), example(N,T1,T2), Es) + }, + html_example_links(Es). + +html_example_links([E]) --> + example_link(E). +html_example_links([E|Es]) --> + example_link(E), + html(' | '), + html_example_links(Es). + +example_link(e(Name, Tag1, Tag2)) --> { http_location_by_id(http_tag_match_page, Link) }, html(a(href(Link+'?t1='+Tag1+'&t2='+Tag2), Name)). +example(exact, verhagen, verhagen). +example(stemming, auto, autos). +example(synonym, auto, wagen). +example('synonym: word order', 'geert wilders', 'wilders, geert'). +example(specific, labrador, hond). +example(specific2, 'amsterdam-oost', amsterdam). +example(generic, paal, amsterdammertje). %% http_tag_match_api(+Request) % @@ -142,37 +155,51 @@ http_tag_match_api(Request) :- [ t1(Tag1, [description('a tag')]), t2(Tag2, [description('another tag')]) ]), - tag_match(Tag1, Tag2, T1Data, T2Data, Type), + ( tag_match(Tag1, Tag2, MatchType, T1Data, T2Data) + -> true + ; MatchType = @false, + T1Data = [], T2Data = [] + ), reply_json(json([t1=json([tag=Tag1|T1Data]), t2=json([tag=Tag2|T2Data]), - match=Type + match=MatchType ])). -tag_match(T, T, TData, _, exact) :- !, +%% tag_match(+Tag1, +Tag2, -Match, -Tag1Data, -Tag2Data) +% +% Succeeds if Tag1 and Tag2 match exactly or there is a relation +% between the concepts associcated with the Tags. TagData provides +% info about how the matching concepts are found. + +tag_match(T, T, exact, TData, _) :- !, ( find_tag_concept(T, _R, Hit) -> tag_data(Hit, TData) ; TData = [] ). -tag_match(T1, T2, TData, _, synonym) :- - find_tag_concept(T1, R, Hit), - find_tag_concept(T2, R, _Hit), - !, - tag_data(Hit, TData). -tag_match(T1, T2, T1Data, T2Data, specific) :- - find_tag_concept(T1, R1, Hit1), - find_tag_concept(T2, R2, Hit2), - ancestor_of(R1, R2), +tag_match(T1, T2, stem, TData, _) :- + snowball(dutch, T1, T), + snowball(dutch, T2, T), !, - tag_data(Hit1, T1Data), - tag_data(Hit2, T2Data). -tag_match(T1, T2, T1Data, T2Data, generic) :- + ( find_tag_concept(T, _R, Hit) + -> tag_data(Hit, TData) + ; TData = [] + ). +tag_match(T1, T2, Type, T1Data, T2Data) :- find_tag_concept(T1, R1, Hit1), find_tag_concept(T2, R2, Hit2), - ancestor_of(R2, R1), + tag_concept_match(R1, R2, Type), !, tag_data(Hit1, T1Data), tag_data(Hit2, T2Data). -tag_match(_, _, [], [], @false). + +tag_concept_match(R, R, synonym) :- !. +tag_concept_match(R1, R2, specific) :- + ancestor_of(R1, R2), + !. +tag_concept_match(R1, R2, generic) :- + ancestor_of(R2, R1), + !. + tag_data(hit(D,R,P,L), Data) :- Data = [uri=R, match=L, matchtype=P, distance=D, type=Type], @@ -191,13 +218,14 @@ find_tag_concept(Tag, R, Hit) :- PrefLabel-0, Label-1 ], - Options = [match(case), - distance(true), - attributes(LabelList) + Options = [match(stem), + attributes(LabelList), + distance(true) ], find_resource_by_name(Tag, Hits, Options), member(Hit, Hits), - Hit = hit(_,R,_,_). + Hit = hit(D,R,_,_), + D =< 3. %% tag_concept_type(+Concept, -Type) @@ -211,9 +239,9 @@ tag_concept_type(R, Type) :- tag_concept_type(_, unknown). scheme_type(gtaa:'Maker', person) :- !. -scheme_type(gtaa:'Namen', person):- !. -scheme_type(gtaa:'Persoonsnamen', person) :- !. -scheme_type(gtaa:'GeografischeNamen', location) :- !. +scheme_type(gtaa:'Name', person):- !. +scheme_type(gtaa:'Person', person) :- !. +scheme_type(gtaa:'Location', location) :- !. %% ancestor_of(+R, +Ancestor) %