amalgame/commit
FIXED: make compound label matcher behave like the other label modules
author | Jacco van Ossenbruggen |
---|---|
Mon Sep 8 21:57:58 2014 +0200 | |
committer | Jacco van Ossenbruggen |
Mon Sep 8 21:57:58 2014 +0200 | |
commit | 9e12d452509262d0c5761d3b6bf28f31e5478fa8 |
tree | 16b1ff08dc07fa57b78338afa55536e71fb83ae1 |
parent | 07e760bdbb3de93e5360c9a98ce5b02faee7dcec |
Diff style: patch stat
diff --git a/lib/ag_modules/compound_label_generator.pl b/lib/ag_modules/compound_label_generator.pl index 69672cf..767e549 100644 --- a/lib/ag_modules/compound_label_generator.pl +++ b/lib/ag_modules/compound_label_generator.pl @@ -1,7 +1,6 @@ :- module(compound_label_generator, []). -:- use_module(library(lists)). :- use_module(library(semweb/rdf_db)). :- use_module(library(amalgame/vocabulary)). :- use_module(string_match_util). @@ -38,8 +37,7 @@ parameter(match_qualified_only, boolean, false, % and Target. matcher(Source, Target, Mappings, Options) :- - findall(M, align(Source, Target, M, Options), ListOfLists), - append(ListOfLists, MappingsUnSorted), + findall(M, align(Source, Target, M, Options), MappingsUnSorted), sort(MappingsUnSorted, Mappings). align(Source, TargetScheme, MatchList, Options) :- diff --git a/lib/ag_modules/compound_label_match.pl b/lib/ag_modules/compound_label_match.pl index 4406f76..2c752ac 100644 --- a/lib/ag_modules/compound_label_match.pl +++ b/lib/ag_modules/compound_label_match.pl @@ -8,93 +8,72 @@ :- use_module(library(amalgame/vocabulary)). :- use_module(string_match_util). -compound_label_match(align(Source, _Target, Prov0), Results, Options) :- +%% compound_label_match(?In, -Out, +Options) is non det. +% +% In and Out are align(S,T,P) terms. +% Typically, there are to modes for calling this predicate: +% +% * Both S and T of In are instantiated, and the predicate is +% called to find a match between S and T, or +% * Only S is ground and the predicate is called to find a +% matching T. +compound_label_match(align(Source, Target, Prov0), + align(Source, Target, [Prov|Prov0]), Options) :- rdf_equal(rdfs:label, RdfsLabel), - option(sourcelabel(MatchProp1), Options, RdfsLabel), + option(sourcelabel(MatchPropS), Options, RdfsLabel), + option(targetlabel(MatchPropT), Options, RdfsLabel), + option(matchacross_lang(MatchAcross), Options, true), + option(matchacross_type(IgnoreType), Options, true), + option(case_sensitive(CaseSensitive), Options, false), option(source_language(Lang), Options, 'any'), - ( Lang == 'any' - -> SourceLang = _ + -> SourceLang = _UnBound ; SourceLang = Lang ), - Prov = [method(compound_label_match), - score(Score), - match(Match), - graph([rdf(Source, SourceProp, - literal(lang(SourceLang, SourceLabel)))]) - ], - - skos_match(Source, MatchProp1, - literal(lang(SourceLang, SourceLabel)), SourceProp, Options), - rdf_tokenize_literal(SourceLabel, Tokens), - findall(Targets-Token-LabelAmbScore, - ( member(Token, Tokens), - atom(Token), - match_label(Source, Token, Targets, - [source_lang(SourceLang)|Options]), - length(Targets, LabelAmbScore) - - ), - Targets), - length(Tokens, TokenLength), - length(Targets, NrMatched), - NrMatched > 0, - Match is NrMatched/TokenLength, - format(atom(Score), 'Matched ~w out of ~w parts', [NrMatched, TokenLength]), - create_results(Targets, Source, Prov0, Prov, Results). + % If we cannot match across languages, set target language to source language + ( MatchAcross == false + -> TargetLang = SourceLang + ; true + ), -create_results([], _, _, _, []). -create_results([Targets|Tail], Source, OldProv, MatchProv, Results):- - create_result_list(Targets, Source, OldProv, MatchProv, Results0), - create_results(Tail, Source, OldProv, MatchProv, Results1), - append(Results0, Results1, Results). + ( CaseSensitive + -> SearchTarget=literal(lang(TargetLang, Token)) + ; SearchTarget=literal(exact(Token), lang(TargetLang, TargetLabel)) + ), -create_result_list([]-_-_, _, _, _, []). -create_result_list([T-TProv|Tail]-L-Count, Source, OldProv, MatchProv, [A|Results]):- - create_result(T-L-TProv-Count, Source, OldProv, MatchProv, A), - create_result_list(Tail-L-Count, Source, OldProv, MatchProv, Results). + skos_match(Source, MatchPropS, + literal(lang(SourceLang, SourceLabel)), + SourceProp, Options), + SourceLabel \= '', + rdf_tokenize_literal(SourceLabel, Tokens), + length(Tokens, TokenLength), TokenLength > 0, + member(Token, Tokens), atom(Token), + skos_match(Target, MatchPropT, SearchTarget, TargetProp, Options), -create_result(Target-L-TargetProvGraph-Count, Source, OldProv, MatchProv, - align(Source, Target, [[token(L),token_ambiguity(Count),graph(ProvGraph)|Rest]|OldProv])) :- - select_option(graph(Graph), MatchProv, Rest), - append(Graph, TargetProvGraph, ProvGraph). + ( option(target_scheme(TargetScheme), Options) + -> vocab_member(Target, TargetScheme) + ; true + ), -match_label(Source, Label, Targets, Options) :- - rdf_equal(rdfs:label, RdfsLabel), - option(targetlabel(MatchProp), Options, RdfsLabel), - option(matchacross_lang(MatchAcross), Options, true), - option(matchacross_type(IgnoreType), Options, true), - option(case_sensitive(CaseSensitive), Options, false), - option(source_lang(SourceLang), Options), + ( IgnoreType + -> true + ; matching_types(Source, Target) + ), - ( CaseSensitive - -> SearchTarget=literal(lang(TargetLang, Label)) - ; SearchTarget=literal(exact(Label), lang(TargetLang, TargetLabel)) + % if matching label has no lang tag, these are still not grounded: + ( var(SourceLang) + -> SourceTerm = literal(SourceLabel) + ; SourceTerm = literal(lang(SourceLang, SourceLabel)) ), - % If we cannot match across languages, set target language to source language - ( MatchAcross == false - -> TargetLang = SourceLang - ; true + ( var(TargetLang) + -> TargetTerm = literal(TargetLabel) + ; TargetTerm = literal(lang(TargetLang, TargetLabel)) ), - findall(Target-ProvGraph, - ( skos_match(Target, MatchProp, SearchTarget, - TargetProp, Options), - ( option(target_scheme(TargetScheme), Options) - -> vocab_member(Target, TargetScheme) - ; true - ), - ( IgnoreType - -> true - ; matching_types(Source, Target) - ), - ( var(TargetLang) - -> TargetTerm = literal(TargetLabel) - ; TargetTerm = literal(lang(TargetLang, TargetLabel)) - ), - ProvGraph = [rdf(Target, TargetProp, TargetTerm)] - ), - Targets), - Targets \= []. + Prov = [method(compound_label), + score(nr_of_tokens(TokenLength)), + graph([rdf(Source, SourceProp, SourceTerm), + rdf(Target, TargetProp, TargetTerm)]) + ]. diff --git a/lib/ag_modules/compound_label_selecter.pl b/lib/ag_modules/compound_label_selecter.pl index 154d950..d4ce0f0 100644 --- a/lib/ag_modules/compound_label_selecter.pl +++ b/lib/ag_modules/compound_label_selecter.pl @@ -5,7 +5,6 @@ :- public selecter/5. :- public parameter/4. -:- use_module(library(lists)). :- use_module(library(semweb/rdf_db)). :- use_module(label_selecter). :- use_module(compound_label_match). @@ -38,5 +37,5 @@ parameter(match_qualified_only, boolean, false, amalgame_module(amalgame:'CompoundLabelSelecter'). selecter(In, Sel, Dis, Und, Options) :- - label_selecter(compound_label_match, In, SelList, Dis, Und, Options), - append(SelList, Sel). % HACK FIX ME + label_selecter(compound_label_match, In, Sel, Dis, Und, Options). +