xmlrdf/commit
ADDED: AHM example conversion
author | Jan Wielemaker |
---|---|
Mon Nov 29 12:49:03 2010 +0100 | |
committer | Jan Wielemaker |
Mon Nov 29 12:49:03 2010 +0100 | |
commit | 085a55ecc0837a18d9e8598c1046e85f4569f5b6 |
tree | 160f4440eb09c1ee10916652dfc76391456c327b |
parent | 486260d33cf5f74d3a664c4e8c052d2311b0479e |
Diff style: patch stat
diff --git a/examples/AHM/.pl-history b/examples/AHM/.pl-history new file mode 100644 index 0000000..42a6d80 --- /dev/null +++ b/examples/AHM/.pl-history @@ -0,0 +1,161 @@ +statistics. +run. +run. +server(5000). +edit(http_browse). +edit(http_browse_rdf). +make. +edit(http_browse_rdf). +run. +server. +server(5000). +edit. +make. +mzke. +rewrite. +clean. +edit. +run. +aatnet. +rewrite. +make. +clean. +run. +rewrite. +edit. +run. +aatned. +rewrite. +server(5000). +edit. +save. +save_persons. +threads. +edit(rewrite). +make. +edit. +run. +aatned. +rewrite. +rdf(S, ahm:identifier, O). +server(5000). +make. +clean. +run. +rewrite. +save. +save_persons. +make. +clean. +edit(clean). +make. +clean. +run. +aatned. +rewrite. +server. +server(5000). +edit(rewrite). +clean. +run. +rewrit. +server(5000). +rdf_list_rule(creator). +make. +rdf_list_rule(creator). +rdf_list_rule(creator). +run. +aatned. +rewrite. +server(5000). +save. +save_persons. +run. +server(5000). +edit(run). +run. +rewrite. +server. +server(5000). +run. +server(5000). +ls. +emacs. +make. +[diag]. +bad(R). +findall(R, bad(R), L), length(L, N). +rdf('__bnode100071', piref, X). +rdf('__bnode100071', ahm:piref, X). +rdf('__bnode100071', ahm:priref, X). +run. +rewrite. +cp_server. +load. +edit. +edit(/). +edit(browse). +make. +edit(rewrite). +make. +rewrite(text_clean). +edit. +clean. +sample. +make. +sample. +edit(run). +clean. +sample. +rewrite(text_clean). +make. +rewrite. +make. +rewrite. +make. +rewrite. +make. +rewrite. +sample. +server. +server(5000). +edit. +cp_server. +cp_server. +sample. +edit(rewrite). +tmon. +threads. +edit(cliopatria). +edit(preferences). +edit(count). +ls. +edit. +ls. +ls. +clean. +edit(server). +edit(cp_server). +make. +threads. +cp_server. +number_chars(X, '345'). +edit. +load. +load_people_bob. +rewrite. +ls. +server. +save. +edit(html_write). +make. +ls. +clean. +d.. . +ls. +edit. +run. +edit(run). +edit(run/0). +rewrite,save. +run. diff --git a/examples/AHM/Notes.txt b/examples/AHM/Notes.txt new file mode 100644 index 0000000..66ff3a6 --- /dev/null +++ b/examples/AHM/Notes.txt @@ -0,0 +1,102 @@ +TODO: +- People thesaurus: use property (Bob) +----------------------------------------------------- + + + + + + + +- For the schema file, I have used the old ahm-schema.ttl file, where vra is replaced by dcterms where possible. Here we use the standard vra-dcterms mapping (http://www.vraweb.org/resources/datastandards/vracore3/categories.html#type) + +- URI's are based on ahm:objectnumber as they seem unique (could also have chosen priref) + +- acquisitionDate and Method both mapped to dct:provenance + +- locat is mapped to dcterms:spatial, which does not really capture the essence + +- ahm:reproduction is a view on the data, which might or might not be available on the web. I think it could be an hasView attached to the aggregation + +- productionPlace: dct does not have a good property, so dct:descr. + +- hasView: is it only for web-dereferencable material? + +- What to do with all the typed bnodes? + +RULES + +- titleTranslation is always the en. translation of the dutch title. As such it is replaced by a dct:title @en. The original Dutch title becomes a ahm:title @nl. This is slightly weird. + +- For dimensions, we have bnodes with (almost) always a value. + +% For makers, we use the person thesaurus. TODO: make the object point +% to the person URI if it is in the thesaurus, or to a BNode object if +% it is not (or add it to the thesaurus). Problematic are two +% Maker properties that are context dependent, the role and the +% qualifier. there are instances where both are present. A possible +% solution is to make a subproperty for each qualifier/role combi. O + +- Exhibitions could be modeled as events, using the EDM event model and stored in a (separate) thesaurus. For now, we use bnodes and 'treat them as resources'. + +- related Object ->reBN'ify? : er zijn meerdere records, waarbij +meerdere relatedObjectReference en relatedObjectNotes zijn. Hiervan is +de structuur niet meer te achterhalen. in the original data there is no +XML grouping of related_object.references and related_object.notes, so +we do not reconstruct that. + + + +NAMES/People + +in people.xml: +- 66937 records +- all ahm:name s are unique. + +in collection-11001-12001.xml: +- 1000 Proxies +- 988 Proxies with ahm:maker (??) +- 988 ahm:maker bnodes with a ahm:creator value +- "onbekend"435 "onbekend (glasblazer)" 305"onbekend (glasgraveur)" 90 "onbekend (glasschilder)" 16 +- 142 other +- 85 distinct +- all 988 creator fields have a unique match in people.xml + +in total 6172 object literals that match one or more people record name +- 25 different properties (ref1) +- also locations etc. + + +Association Person: +in collection-11001-12001.xml: 195 entries, all with unique match in person thes. +- associationPerson, contentPerson, documentationAuthor, exhibitionOrganiser, reproductionCreators mapped to people name thesaurus +- contentSubject and associationSubject not mapped to people.: only few hits, clearly not people (paard, libelle..) + +Alternative Number +- Again, I use the bnode reification trick, where the alternativeNumber becomes subproperty of rdfs:label of the bnode + + + + +*----------- Mapping to thesaurus. + +I have looked at which literals appear as pref/altlabel values +in the converted thesaurus. There are a large number of properties +(ref2). +for now, each property gives rise to one rewrite rule. + + +(ref1): [reproductionCreator,creator,documentationAuthor,exhibitionOrganiser,exhibitionVenue,currentLocation,creatorDateOfDeath,AHMTextsAuthor,documentationTitle,documentationSortyear,alternativeNumberType,exhibitionCatalogueNumber,alternativeNumberInstitution,dimensionValue,creatorDateOfBirth,creditLine,productionPlace,contentMotifGeneral,acquisitionMethod,associationPerson,associationSubject,contentSubject,contentPersonName,acquisitionDate,material] + +(ref2): List of metadata properties, whose object literals are the same +as preflabels of skos concepts. (starred ones are not rewritten) + +[acquisitionMethod],[alternativeNumber],[alternativeNumberInstitution],[associationSubject],[contentMotifGeneral],[contentSubject],[creditLine],[currentLocation],[currentLocationFitness],[dimensionNotes],[dimensionPart],[dimensionType],[dimensionUnit],[dimensionValue*],[documentationSortyear*],[documentationTitle],[exhibitionCatalogueNumber*],[exhibitionVenue],[material],[objectCategory],[objectName],[productionDateEnd*],[productionDateStart*],[productionPeriod],[productionPlace],[reproductionFormat*],[reproductionType],[technique],[title] + + + + + + + + diff --git a/examples/AHM/TODO b/examples/AHM/TODO new file mode 100644 index 0000000..583611b --- /dev/null +++ b/examples/AHM/TODO @@ -0,0 +1 @@ +Images: http://ahm.adlibsoft.com/AHMimages/... diff --git a/examples/AHM/diag.pl b/examples/AHM/diag.pl new file mode 100644 index 0000000..d8a3687 --- /dev/null +++ b/examples/AHM/diag.pl @@ -0,0 +1,11 @@ +num_creators(R, N) :- + bagof(C, rdf(R, ahm:creator, C), Cs), + length(Cs, N). +num_date_of_birth(R, N) :- + bagof(C, rdf(R, ahm:creatorDateOfBirth, C), Cs), + length(Cs, N). + +bad(R) :- + num_creators(R, N1), + num_date_of_birth(R, N2), + N1 \== N2. diff --git a/examples/AHM/rewrite.pl b/examples/AHM/rewrite.pl new file mode 100644 index 0000000..2d2b1e2 --- /dev/null +++ b/examples/AHM/rewrite.pl @@ -0,0 +1,658 @@ +:- module(ahm_rewrite, + [ rewrite/0, + rewrite/1, + rewrite/2, + list_rules/0 + ]). +:- use_module(library(semweb/rdf_db)). +:- use_module(xmlrdf(rdf_convert_util)). +:- use_module(xmlrdf(cvt_vocabulary)). +:- use_module(xmlrdf(rdf_rewrite)). +:- use_module(util). + +:- debug(rdf_rewrite). + +%% rewrite +% +% Apply all rules on the graph =data= + +rewrite :- + rdf_rewrite(data). + +%% rewrite(+Rule) +% +% Apply the given rule on the graph =data= + +rewrite(Rule) :- + rdf_rewrite(data, Rule). + +%% rewrite(+Graph, +Rule) +% +% Apply the given rule on the given graph. + +rewrite(Graph, Rule) :- + rdf_rewrite(Graph, Rule). + +%% list_rules +% +% List the available rules to the console. + +list_rules :- + rdf_rewrite_rules. + +:- discontiguous + rdf_mapping_rule/5. + +% URI's are based on ahm:objectnumber as they seem unique \ +%(could also have chosen ahm:priref). Here we only make the +% proxy, the rest of the EDM triangle is made later + +assign_uris @@ +{ S, ahm:objectNumber, ObjectNumber } \ +{ S } <=> + literal_to_id(['proxy-', ObjectNumber], ahm, URI), + { URI }. + + +% Title is always in Dutch, for a few objects, a translation into +% English is available, which maps to @en +title_nl @@ +{ S, ahm:title, TitleNL } +<=> +{ S, ahm:title, TitleNL@nl}. + +title_en @@ +{ S, ahm:titleTranslation, TitleEN } +<=> +{ S, ahm:title, TitleEN@en}. + + +%------------ CREATOR RULES ------------ + +% For makers, we use the person thesaurus. TODO: make the object point +% to the person URI if it is in the thesaurus, or to a BNode object if +% it is not (or add it to the thesaurus). +% +% Problematic are two Maker properties that are context dependent, the +% role and the qualifier. there are instances where both are present. +% The solution we use is for the roles to use subproperties and for the +% qualifiers to keep the bnode, with rdf:value=thesaurus uri. Another +% possible solution is to make a subproperty for each qualifier/role +% combi. + + +% Find uri's for creators. This version checks agains existence of a uri +% in the people thesaurus (currently, this looks in all loaded rdf, no +% graph is given. But we do check for the rdf type). Make sure that this +% is up to date with the people conversion: the literal to id scheme +% should be the same. We assume here that the data instances correspond +% exactly to the thesaurus concepts. +% NOTE: we here use the bnode-reification trick, with rdf:value because +% we have a uri + +% CreatorRoles. Here we used to do the same as in bibliopolis: we make +% different subproperties for different roles. If there is no specified +% role (""), we keep ahm:maker. This uses a util predicate. NOTE: +% CHANGED THIS, no longer uses sub-property-role +% +/* OLD RULES +creator_hackrule @@ % This rule to not mess up the ahm:maker +{S, ahm:maker, M} +<=> +{S, ahm:makerorig, M}. + +creator_to_uris @@ +{ S, ahm:makerorig, B}, +{ B, ahm:creator, Name}, +{ B, ahm:creatorRole, literal(Role)} + +<=> + rdf_is_bnode(B), + role_to_property(Role, Property), + {S, Property, B}, + rdf(PersonURI, ahm:name, Name), + rdf(PersonURI, rdf:type, ahm:'Person'), + {B, rdf:value, PersonURI}. +*/ + +creator_to_uris @@ +{ _, ahm:maker, B}\ +{ B, ahm:creator, Name} +<=> + rdf_is_bnode(B), + rdf(PersonURI, ahm:name, Name), + rdf(PersonURI, rdf:type, ahm:'Person'), + {B, rdf:value, PersonURI}. + +creator_clean @@ +{M, ahm:creatorDateOfBirth, _} +<=> +rdf_is_bnode(M), +true. + +creator_clean @@ +{M, ahm:creatorDateOfDeath, _} +<=> +rdf_is_bnode(M), +true. + +creator_clean @@ +{M, ahm:creatorDateOfDeath, _} +<=> +rdf_is_bnode(M), +true. + + + +% Same for associated persons +association_person @@ +{S, ahm:associationPerson, AP} +<=> +rdf(PersonURI, ahm:name, AP), +rdf(PersonURI, rdf:type, ahm:'Person'), +{S, ahm:associationPerson, PersonURI}. + +% Same for associated persons +content_person @@ +{S, ahm:contentPersonName, CP} +<=> +rdf(PersonURI, ahm:name, CP), +rdf(PersonURI, rdf:type, ahm:'Person'), +{S, ahm:contentPersonName, PersonURI}. + +% Same for documentationAuthors +documentation_author @@ +{S, ahm:documentationAuthor, DA} +<=> +rdf(PersonURI, ahm:name, DA), +rdf(PersonURI, rdf:type, ahm:'Person'), +{S, ahm:documentationAuthor, PersonURI}. + +% Same for reproductionCreators +reproduction_creator @@ +{S, ahm:reproductionCreator, RC} +<=> +rdf(PersonURI, ahm:name, RC), +rdf(PersonURI, rdf:type, ahm:'Person'), +{S, ahm:reproductionCreator, PersonURI}. + +% Same for exhibitionOrganiser +exhibition_organiser @@ +{S, ahm:exhibitionOrganiser, RC} +<=> +rdf(PersonURI, ahm:name, RC), +rdf(PersonURI, rdf:type, ahm:'Person'), +{S, ahm:exhibitionOrganiser, PersonURI}. + + + +% For dimensions, we concatenate all available information into a single +% literal value, which becomes the rdf:label(!) of the bnode. The +% individual properties are retained. + +dimensions @@ +{ _S, ahm:dimension, B}, +{ B, ahm:dimensionValue, literal(Val)}, +{ B, ahm:dimensionUnit, literal(Unit)}?, +{ B, ahm:dimensionType, literal(Type)}?, +{ B, ahm:dimensionPrecision, literal(Prec)}?, +{ B, ahm:dimensionPart, literal(Part)}?, +{ B, ahm:dimensionNotes, literal(Notes)}? + +==> +rdf_is_bnode(B), + concat_maybe([Type,Val,Unit,Prec,Part,Notes], ConcatVal), + {B, rdfs:label, literal(ConcatVal)}. + + +% For documentations, we do the same thing: gather all literals, +% concatenate the bound variables and use that as rdfs:label of the +% bnode. + +documentation @@ +{ _S, ahm:documentation, B}, +{ B, ahm:documentationAuthor, literal(Author)}?, +{ B, ahm:documentationPageReference, literal(PageRef)}?, +{ B, ahm:documentationShelfMark, literal(ShelfMark)}?, +{ B, ahm:documentationSortYear, literal(SortYear)}?, +{ B, ahm:documentationTitle, literal(Title)}?, +{ B, ahm:documentationTitleArticle, literal(TitleArticle)}?, +{ B, ahm:documentationLRef, literal(LRef)}? +==> +rdf_is_bnode(B), + concat_maybe([Author, TitleArticle, Title, PageRef, SortYear, ShelfMark, LRef], ConcatVal), + {B, rdfs:label, literal(ConcatVal)}. + + +% For Exhibitions, a better way would be to model them as events using +% the EDM classes and properties available and the info should be stored +% in a (separate) thesaurus. For now, we use bnodes and 'treat them as +% resources', rdfs:label is a maybeconcat of the available info. Also, +% ahm:exhibition appears twice, once with domain=resource and once with +% domain=Exhibition. We rename the latter. + +exhibitions @@ +{S, ahm:exhibition, B}, +{ B, ahm:exhibitionCatalogueNumber, literal(CatNum)}?, +{ B, ahm:exhibitionCode, literal(Code)}?, +{ B, ahm:exhibitionDateEnd , literal(DateEnd)}?, +{ B, ahm:exhibitionDateStart, literal(DateStart)}?, +{ B, ahm:exhibitionLref, literal(Lref)}?, +{ B, ahm:exhibitionObjectLocation , literal(ObjLoc)}?, +{ B, ahm:exhibitionOrganiser , literal(Organiser)}?, +{ B, ahm:exhibitionVenue , literal(Venue)}?, +{S, ahm:exhibition, B} \ %cant combine ? and \, so this is a hack + +{ B, ahm:exhibition, literal(ExName)} +<=> +rdf_is_bnode(B), + concat_maybe([ExName, Venue, DateStart, DateEnd, Organiser, Code, Lref, CatNum, ObjLoc], ConcatVal), + {B, ahm:exhibitionTitle, literal(ExName)}, + {B, rdfs:label, literal(ConcatVal)}. + + +% For Locations, we concatenate a term using all descriptions. Not sure +% if these hould actually to be thesaurus concepts + +exhibitions @@ +{_S, ahm:locat, B}, +{B, ahm:currentLocation, literal(Loc)}?, +{B, ahm:currentLocationDateEnd, literal(LocDE)}?, +{B, ahm:currentLocationDateStart, literal(LocDS)}?, +{B, ahm:currentLocationFitness, literal(LocFit)}?, +{B, ahm:currentLocationLref, literal(LocLref)}?, +{B, ahm:currentLocationNotes, literal(LocNotes)}?, +{B, ahm:currentLocationType, literal(LocType)}? +==> +rdf_is_bnode(B), + concat_maybe([Loc, LocDS, LocDE, LocFit, LocLref, LocNotes, LocType], ConcatVal), + {B, rdfs:label, literal(ConcatVal)}. + + + +% Related Object Reference, convert from literal to object (proxy) +% uri Note that in the original data there is no XML grouping of +% related_object.references and related_object.notes, so we do not +% reconstruct that. + +related_object_reference @@ +{S, ahm:relatedObjectReference, literal(ObjRef)} +<=> +literal_to_id(['proxy-', ObjRef],ahm, ObjURI), + {S,ahm:relatedObjectReference, ObjURI}. + +% can be removed if already the title of the thing itself +related_object_title @@ +{S, ahm:relatedObjectReference, O}, + {O, ahm:title, T}\ + {S, ahm:relatedObjectTitle, T} + <=> + true. + +% ----------- PART OF ---------------- +% +% Replace literal with object uri (think about partsTitle?) + +partofref @@ +{Obj, ahm:objectNumber, ON}\ +{S, ahm:partOfReference, ON} +<=> +{S, ahm:partOfReference, Obj}. + +partofref @@ +{Obj, ahm:objectNumber, ON}\ +{S, ahm:partsReference, ON} +<=> +{S, ahm:partsReference, Obj}. + + + +% RULES BELOW ARE BY JAN +% +% All text classification seems tripled, with lang=neutral, lang=0 and +% lang=1. This rule deletes all the ones except for the neutral one. + + +clean_empty @@ +{ _, _, "" } <=> true. + +clean_selected @@ +{ _, ahm:selected, "False" } <=> true. + + + +text_clean @@ +{ N, ahm:value, TextType }, +{ TextType, rdf:value, Value} \ +{ TextType, ahm:lang, "neutral"}, +{ N, ahm:value, TextType2 }, +{ TextType2, rdf:value, Value}, +{ TextType2, _, _} + <=> TextType \== TextType2. + +webtextlang @@ +{T, ahm:'AHMTextsTekst', Text}, +{T, ahm:'AHMTextsType', BN1}, +{BN1, rdf:type, ahm:'AHMTextsType'}, +{BN1, ahm:value, BN}, +{BN, rdf:value, "webtekst ENG"}, +{BN, _, _} + <=> + {T, ahm:'AHMTextsType', "webtekst"}, + {T, ahm:'AHMTextsTekst', Text@en}. +webtextlang @@ +{T, ahm:'AHMTextsTekst', Text}, +{T, ahm:'AHMTextsType', BN1}, +{BN1, rdf:type, ahm:'AHMTextsType'}, +{BN1, ahm:value, BN}, +{BN, rdf:value, "webtekst NL"}, +{BN, _, _} + <=> + {T, ahm:'AHMTextsType', "webtekst"}, + {T, ahm:'AHMTextsTekst', Text@nl}. +webtextlang @@ +{T, ahm:'AHMTextsTekst', Text}, +{T, ahm:'AHMTextsType', BN1}, +{BN1, rdf:type, ahm:'AHMTextsType'}, +{BN1, ahm:value, BN}, +{BN, rdf:value, "zaaltekst ENG"}, +{BN, _, _} + <=> + {T, ahm:'AHMTextsType', "zaaltekst"}, + {T, ahm:'AHMTextsTekst', Text@nl}. +webtextlang @@ +{T, ahm:'AHMTextsTekst', Text}, +{T, ahm:'AHMTextsType', BN1}, +{BN1, rdf:type, ahm:'AHMTextsType'}, +{BN1, ahm:value, BN}, +{BN, rdf:value, "zaaltekst NL"}, +{BN, _, _} + <=> + {T, ahm:'AHMTextsType', "zaaltekst"}, + {T, ahm:'AHMTextsTekst', Text@nl}. + + +% Added by Victor: takes care of the category +webtextcat @@ +{T, ahm:'AHMTextsType', BN1}, +{BN1, rdf:type, ahm:'AHMTextsType'}, +{BN1, ahm:value, BN}, +{BN, rdf:value, literal(Lit)}, +{BN, _, _} + <=> + {T, ahm:'AHMTextsType', literal(Lit)}. + + +% clean +webtextcat @@ +{T, ahm:'AHMTextsType', BN1}, +{BN1, rdf:type, ahm:'AHMTextsType'} +<=> +true. +% clean +webtextcat @@ +{_, ahm:value, _} +<=> +true. +webtextcat @@ +{_, rdf:type, ahm:'Value'} +<=> +true. + + + /******************************* + * MAP TO THESAURUS * + *******************************/ + +/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +The following rules replace a literal value by the thesaurus skos +concept for a given set of properties. Each property uses a separate +rule. */ + +labelPred(skos:prefLabel). +labelPred(skos:altLabel). + +map_to_thesaurus @@ +{C, rdf:type, skos:'Concept'}, + {C, P, literal(Lit)}\ + {S, ahm:acquisitionMethod, literal(Lit)} + <=> + labelPred(P), + {S, ahm:acquisitionMethod, C}. + +map_to_thesaurus @@ +{C, rdf:type, skos:'Concept'}, + {C, P, literal(Lit)}\ + {S, ahm:alternativeNumberInstitution, literal(Lit)} + <=> + labelPred(P), + {S, ahm:alternativeNumberInstitution, C}. +map_to_thesaurus @@ +{C, rdf:type, skos:'Concept'}, + {C, P, literal(Lit)}\ + {S, ahm:associationSubject, literal(Lit)} + <=> + labelPred(P), + {S, ahm:associationSubject, C}. + +map_to_thesaurus @@ +{C, rdf:type, skos:'Concept'}, + {C, P, literal(Lit)}\ + {S, ahm:contentMotifGeneral, literal(Lit)} + <=> + labelPred(P), + {S, ahm:contentMotifGeneral, C}. + +map_to_thesaurus @@ +{C, rdf:type, skos:'Concept'}, + {C, P, literal(Lit)}\ + {S, ahm:contentSubject, literal(Lit)} + <=> + labelPred(P), + {S, ahm:contentSubject, C}. + +%clean +map_to_thesaurus @@ + {P, rdf:type, ahm:'ContentSubject'}, + {_, ahm:contentSubject,P} + <=> + true. + + +% maybe not this one (is always AHM +map_to_thesaurus @@ +{C, rdf:type, skos:'Concept'}, + {C, P, literal(Lit)}\ + {S, ahm:creditLine, literal(Lit)} + <=> + labelPred(P), + {S, ahm:creditLine, C}. + +map_to_thesaurus @@ +{C, rdf:type, skos:'Concept'}, + {C, P, literal(Lit)}\ + {S, ahm:currentLocation, literal(Lit)} + <=> + labelPred(P), + {S, ahm:currentLocation, C}. + +map_to_thesaurus @@ +{C, rdf:type, skos:'Concept'}, + {C, P, literal(Lit)}\ + {S, ahm:currentLocationFitness, literal(Lit)} + <=> + labelPred(P), + {S, ahm:currentLocationFitness, C}. + +map_to_thesaurus @@ +{C, rdf:type, skos:'Concept'}, + {C, P, literal(Lit)}\ + {S, ahm:dimensionNotes, literal(Lit)} + <=> + labelPred(P), + {S, ahm:dimensionNotes, C}. + +map_to_thesaurus @@ +{C, rdf:type, skos:'Concept'}, + {C, P, literal(Lit)}\ + {S, ahm:dimensionPart, literal(Lit)} + <=> + labelPred(P), + {S, ahm:dimensionPart, C}. + + +map_to_thesaurus @@ +{C, rdf:type, skos:'Concept'}, + {C, P, literal(Lit)}\ + {S, ahm:dimensionType, literal(Lit)} + <=> + labelPred(P), + {S, ahm:dimensionType, C}. + +map_to_thesaurus @@ +{C, rdf:type, skos:'Concept'}, + {C, P, literal(Lit)}\ + {S, ahm:dimensionUnit, literal(Lit)} + <=> + labelPred(P), + {S, ahm:dimensionUnit, C}. + +map_to_thesaurus @@ +{C, rdf:type, skos:'Concept'}, + {C, P, literal(Lit)}\ + {S, ahm:documentationTitle, literal(Lit)} + <=> + labelPred(P), + {S, ahm:documentationTitle, C}. + +map_to_thesaurus @@ +{C, rdf:type, skos:'Concept'}, + {C, P, literal(Lit)}\ + {S, ahm:exhibitionVenue, literal(Lit)} + <=> + labelPred(P), + {S, ahm:exhibitionVenue, C}. + +map_to_thesaurus @@ +{C, rdf:type, skos:'Concept'}, + {C, P, literal(Lit)}\ + {S, ahm:material, literal(Lit)} + <=> + labelPred(P), + {S, ahm:material, C}. + +map_to_thesaurus @@ +{C, rdf:type, skos:'Concept'}, + {C, P, literal(Lit)}\ + {S, ahm:objectCategory, literal(Lit)} + <=> + labelPred(P), + {S, ahm:objectCategory, C}. + +map_to_thesaurus @@ +{C, rdf:type, skos:'Concept'}, + {C, P, literal(Lit)}\ + {S, ahm:objectName, literal(Lit)} + <=> + labelPred(P), + {S, ahm:objectName, C}. + +map_to_thesaurus @@ +{C, rdf:type, skos:'Concept'}, + {C, P, literal(Lit)}\ + {S, ahm:productionPeriod, literal(Lit)} + <=> + labelPred(P), + {S, ahm:productionPeriod, C}. +%clean +map_to_thesaurus @@ + {P, rdf:type, ahm:'ProductionPlace'}, + {_, ahm:productionPlace,P} + <=> + true. + +map_to_thesaurus @@ +{C, rdf:type, skos:'Concept'}, + {C, P, literal(Lit)}\ + {S, ahm:productionPlace, literal(Lit)} + <=> + labelPred(P), + {S, ahm:productionPlace, C}. + +map_to_thesaurus @@ +{C, rdf:type, skos:'Concept'}, + {C, P, literal(Lit)}\ + {S, ahm:reproductionType, literal(Lit)} + <=> + labelPred(P), + {S, ahm:reproductionType, C}. + +map_to_thesaurus @@ +{C, rdf:type, skos:'Concept'}, + {C, P, literal(Lit)}\ + {S, ahm:technique, literal(Lit)} + <=> + labelPred(P), + {S, ahm:technique, C}. + +map_to_thesaurus @@ +{C, rdf:type, skos:'Concept'}, + {C, P, literal(Lit)}\ + {S, ahm:collection, literal(Lit)} + <=> + labelPred(P), + {S, ahm:collection, C}. + + + /******************************* + * EDM * + *******************************/ + +/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Given a nice DC model, we now change this into an EDM model. To do so, +we must create three resources: + + 1. the aggregation + 2. the physical object + 3. a proxy with the description + +The proxy is where the description lives, so we use the initial record +with DC attributes for that. +- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ + +:- rdf_register_ns(ens, 'http://www.europeana.eu/schemas/edm/'). +:- rdf_register_ns(irw, 'http://ontologydesignpatterns.org/ont/web/irw.owl#'). +:- rdf_register_ns(ore, 'http://www.openarchives.org/ore/terms/'). + +edm @@ +{S, rdf:type, ahm:'Record'}, +{S, ahm:reproduction, Reproduction } ? % reproduction becomes a hasView + <=> + edm_identifier(S, proxy, aggregation, AggURI), + edm_identifier(S, proxy, physical, PhysURI), + { S, rdf:type, ore:'Proxy'}, + { AggURI, rdf:type, ore:'Aggregation' }, + { PhysURI, rdf:type, ens:'PhysicalThing' }, + { S, ore:proxyIn, AggURI }, + { S, ore:proxyFor, PhysURI }, + { AggURI, ore:aggregates, PhysURI }, + { AggURI, ens:aggregatedCHO, PhysURI }, + { AggURI, ens:hasView, Reproduction }. + + +% ------------- Thumbnails ------------- +% This guesses the thumbnail uri, based on the object number +% +get_thumbnails @@ +{S, ahm:objectNumber, literal(ON)} +==> + edm_identifier(S, proxy, aggregation, AggURI), + object_number_to_url(ON, URL), + { AggURI, ens:hasThumbnail, URL }, % create new property with link to the ens:WebResource + { URL, rdf:type, ens:'WebResource' }. + + + + + + diff --git a/examples/AHM/rewrite_thes.pl b/examples/AHM/rewrite_thes.pl new file mode 100644 index 0000000..877cb85 --- /dev/null +++ b/examples/AHM/rewrite_thes.pl @@ -0,0 +1,241 @@ +:- module(ahm_rewrite_thes, + [ rewrite/0, + rewrite/1, + rewrite/2, + list_rules/0 + ]). +:- use_module(library(semweb/rdf_db)). +:- use_module(xmlrdf(rdf_convert_util)). +:- use_module(xmlrdf(cvt_vocabulary)). +:- use_module(xmlrdf(rdf_rewrite)). +:- use_module(util). + +:- debug(rdf_rewrite). + +%% rewrite +% +% Apply all rules on the graph =data= + +rewrite :- + rdf_rewrite(thesaurus). + +%% rewrite(+Rule) +% +% Apply the given rule on the graph =data= + +rewrite(Rule) :- + rdf_rewrite(thesaurus, Rule). + +%% rewrite(+Graph, +Rule) +% +% Apply the given rule on the given graph. + +rewrite(Graph, Rule) :- + rdf_rewrite(Graph, Rule). + +%% list_rules +% +% List the available rules to the console. + +list_rules :- + rdf_rewrite_rules. + +:- discontiguous + rdf_mapping_rule/5. + + + + + +record_to_concepts @@ +{S, rdf:type, ahm:'Record'} +<=> +{S, rdf:type, skos:'Concept'}. + + + + +% ------------ SKOS hierarchy +related properties------ +% +% Terms are unique, so we can find the correct concept through this property + +narrower @@ +{S, ahm:narrowerTerm, NarTerm} +<=> +rdf(NarUri, ahm:term, NarTerm), + {S, skos:narrower, NarUri}. + +% if it doesnt exist, create (or refer to) a new skos:concept +narrower @@ +{S, ahm:narrowerTerm, NarTerm} +<=> +literal_to_id(['t00',NarTerm],ahm, NarUri), + {NarUri, rdf:type, skos:'Concept'}, + {NarUri, skos:prefLabel, NarTerm}, + {S, skos:narrower, NarUri}. + + +% same for broader +broader @@ +{S, ahm:broaderTerm, BroadTerm} +<=> +rdf(BroadUri, ahm:term, BroadTerm), + {S, skos:broader, BroadUri}. + +% if it doesnt exist, create a skos:concept +broader @@ +{S, ahm:broaderTerm, BroTerm} +<=> +literal_to_id(['t00',BroTerm],ahm, BroUri), + {BroUri, rdf:type, skos:'Concept'}, + {BroUri, skos:prefLabel, BroTerm}, + {S, skos:broader, BroUri}. + + +% same for related +related @@ +{S, ahm:relatedTerm,RelTerm} +<=> +rdf(RelUri, ahm:term, RelTerm), + {S, skos:related, RelUri}. + +% if it doesnt exist, create a skos:concept +related @@ +{S, ahm:relatedTerm,RelTerm} +<=> +literal_to_id(['t00',RelTerm],ahm, RelUri), + {RelUri, rdf:type, skos:'Concept'}, + {RelUri, skos:prefLabel, RelTerm}, + {S, skos:related, RelUri}. + + + +% ------------ equivalent term -------- +% +% get the correct uri. +% TODO: is this skos:exactmatch or owl:sameas? + +equivalent_term @@ +{S, ahm:equivalentTerm, EqTerm} +<=> +rdf(EqUri, ahm:term, EqTerm), + {S, skos:exactMatch, EqUri}. + + +%------------- TERMTYPES ------------ + +% termTypes are mapped to scopeNotes in the schema file. +% language: we have four values, [neutral, 0,1,2,3], corresponding to +% "neutral"/ EN/ NL/ DL. The first one is used as the rdf:label of the +% bnode, the other ones are values, which are subproperties of +% rdf:label + +langvals @@ +{S, ahm:value, Val}, + {Val, ahm:lang, literal('neutral')}, + {Val, rdf:value, RVAL} +<=> + {S, ahm:value, RVAL}. + +langvals @@ +{S, ahm:value, Val}, + {Val, ahm:lang, literal(Lang)}, + {Val, rdf:value, literal(RVal)} +<=> +lang_to_langcode(Lang,LangCode), + {S, ahm:value, literal(lang(LangCode, RVal))}. + +langvals @@ +{_S, ahm:value, Val} +<=> +rdf_is_bnode(Val),true. + +langvals @@ +{_, ahm:lang, _} +<=> +true. + +% Give the Term type bnodes a uri +termtypes_to_uris @@ +{_S, ahm:termType, TT}, +{TT, ahm:value, VAL}\ +{TT} +<=> +rdf_is_bnode(TT), +not(VAL = literal(lang(_,_))), + literal_to_id(['tt00',VAL],ahm,TypeUri), + {TypeUri}. + +termtypes_to_uris @@ +{_S, ahm:termType, literal('')} +<=> +true. +% ----------- USE/USEFOR --------- + +% use becomes altlabel. Todo: clean up the rest +use_to_altlabel @@ +{S, ahm:use, UseTerm} + <=> +rdf(UseUri, ahm:term, UseTerm), +rdf(S, ahm:term, AltLab), + {UseUri, skos:altLabel, AltLab}. + +use_to_altlabel @@ +{_S, ahm:use, _} + <=> +true. + + +% If it's not yet added, do it now. +use_to_altlabel @@ +{S, ahm:usedFor, AltLab} + <=> + not(rdf(S, skos:altLabel, AltLab)), + {S, skos:altLabel, AltLab}. + +use_to_altlabel @@ +{_, ahm:usedFor, _} + <=> + true. + + + + + +% ----------- LABELS --------- + +% preflabel (do this last, to avoid rewrite order problems +term_to_label @@ +{S, ahm:term, Term} + <=> +{S, skos:prefLabel, Term}. + + +% ----------- URIs --------- + +skos_uris @@ +{S, rdf:type, skos:'Concept'}, +{S, ahm:priref, literal(Pri)}, +{S, skos:prefLabel, literal(Term)} \ {S} +<=> +literal_to_id(['t',Pri, Term], ahm, URI), +{URI}. + + + +% ----------- CLEAN UP --------- + +% Category has only one unimportant triple +remove_category @@ +{_, ahm:category, _} + <=> +true. + +% all selected values are "false", can be removed +remove_selected @@ +{_, ahm:selected, _} + <=> +true. + + + diff --git a/examples/AHM/run-thesaurus.pl b/examples/AHM/run-thesaurus.pl new file mode 100644 index 0000000..7c2a9fe --- /dev/null +++ b/examples/AHM/run-thesaurus.pl @@ -0,0 +1,74 @@ +:- prolog_load_context(directory, Dir), + asserta(user:file_search_path(ahm, Dir)). + +user:file_search_path(xmlrdf, ahm('..')). +user:file_search_path(data, ahm('../../AHM')). +user:file_search_path(cliopatria, ahm('../../ClioPatria')). +user:file_search_path(getty, ahm('../../../eculture/RDF/vocabularies/getty')). + +:- load_files(library(semweb/rdf_db), [silent(true)]). + +:- rdf_register_ns(ahm, 'http://purl.org/collections/ahm/'). +:- rdf_register_ns(ulan, 'http://e-culture.multimedian.nl/ns/getty/ulan#'). +:- rdf_register_ns(aatned, 'http://e-culture.multimedian.nl/ns/rkd/aatned/'). +:- rdf_register_ns(skos, 'http://www.w3.org/2004/02/skos/core#'). +:- rdf_register_ns(foaf, 'http://xmlns.com/foaf/0.1/'). + +:- load_files([ cliopatria(cliopatria), + xmlrdf(xmlrdf), + library(semweb/rdf_cache), + library(semweb/rdf_library), + library(semweb/rdf_turtle_write) + ], [silent(true)]). +:- use_module(rewrite_thes). + +load_ontologies :- + rdf_attach_library(cliopatria(ontologies)), + rdf_load_library(dc), + rdf_load_library(skos), + rdf_load_library(rdfs), + rdf_load_library(owl), + absolute_file_name(data('rdf/ahm-voc-schema.ttl'), VocSchema, [access(read)]), + rdf_load(VocSchema,[graph(thesaurus_schema)]). + +:- initialization % run *after* loading this file + ensure_dir(cache), + rdf_set_cache_options([ global_directory('cache/rdf'), + create_global_directory(true) + ]), + load_ontologies. + + +ensure_dir(Dir) :- + exists_directory(Dir), !. +ensure_dir(Dir) :- + make_directory(Dir). + + + +load_thesaurus:- + absolute_file_name(data('src/thesaurus.xml'), File, + [ access(read) + ]), + load(File). + +load(File) :- + rdf_current_ns(ahm, Prefix), + load_xml_as_rdf(File, + [ dialect(xml), + unit(record), + prefix(Prefix), + graph(thesaurus) + ]). + + +run_thesaurus:- + load_thesaurus, + rewrite_thes:rewrite, + save_thesaurus. + +save_thesaurus:- + absolute_file_name(data('rdf/thesaurus.ttl'), File, + [ access(write) + ]), + rdf_save_turtle(File,[graph(thesaurus)]). diff --git a/examples/AHM/run.pl b/examples/AHM/run.pl new file mode 100644 index 0000000..b10c937 --- /dev/null +++ b/examples/AHM/run.pl @@ -0,0 +1,128 @@ +:- prolog_load_context(directory, Dir), + asserta(user:file_search_path(ahm, Dir)). + +user:file_search_path(xmlrdf, ahm('..')). +user:file_search_path(data, ahm('../../AHM')). +user:file_search_path(cliopatria, ahm('../../ClioPatria')). +user:file_search_path(getty, ahm('../../../eculture/RDF/vocabularies/getty')). + +:- load_files(library(semweb/rdf_db), [silent(true)]). + +:- rdf_register_ns(ahm, 'http://purl.org/collections/ahm/'). +:- rdf_register_ns(ulan, 'http://e-culture.multimedian.nl/ns/getty/ulan#'). +:- rdf_register_ns(aatned, 'http://e-culture.multimedian.nl/ns/rkd/aatned/'). +:- rdf_register_ns(skos, 'http://www.w3.org/2004/02/skos/core#'). +:- rdf_register_ns(foaf, 'http://xmlns.com/foaf/0.1/'). + +:- load_files([ cliopatria(cliopatria), + xmlrdf(xmlrdf), + library(semweb/rdf_cache), + library(semweb/rdf_library), + library(semweb/rdf_turtle_write) + ], [silent(true)]). +:- use_module(rewrite). + +load_ontologies :- + rdf_attach_library(cliopatria(ontologies)), +% rdf_attach_library(getty(.)), + rdf_load_library(dc), + rdf_load_library(skos), + rdf_load_library(rdfs), + rdf_load_library(owl), + absolute_file_name(data('rdf/ahm-schema.ttl'), Schema, [access(read)]), + rdf_load(Schema). + +:- initialization % run *after* loading this file + ensure_dir(cache), + rdf_set_cache_options([ global_directory('cache/rdf'), + create_global_directory(true) + ]), + load_ontologies. + +ensure_dir(Dir) :- + exists_directory(Dir), !. +ensure_dir(Dir) :- + make_directory(Dir). + + +:- debug(xmlrdf). + +load :- + absolute_file_name(data(src), Dir, + [ file_type(directory) + ]), + atom_concat(Dir, '/collection-*.xml', Pattern), + expand_file_name(Pattern, Files), + maplist(load, Files). + +load_people_bob:- + absolute_file_name(data('rdf/persons.ttl'), File, + [ access(read) + ]), + rdf_load(File,[graph(peoplebob)]), + absolute_file_name(data('rdf/ahm-people-schema.ttl'), FileSchema, + [ access(read) + ]), + rdf_load(FileSchema,[graph(peoplebob_schema)]). + +load_thesaurus:- + absolute_file_name(data('rdf/thesaurus.ttl'), File, + [ access(read) + ]), + rdf_load(File,[graph(thesaurus)]), + absolute_file_name(data('rdf/ahm-voc-schema.ttl'), FileSchema, + [ access(read) + ]), + rdf_load(FileSchema,[graph(thesaurus_schema)]). + + +load(File) :- + rdf_current_ns(ahm, Prefix), + load_xml_as_rdf(File, + [ dialect(xml), + unit(record), + prefix(Prefix) + ]). + +clean :- + rdf_retractall(_,_,_,data), + rdf_retractall(_,_,_,peoplebob), + rdf_retractall(_,_,_,thesaurus). + +sample :- + absolute_file_name(data('src/collection-11001-12001.xml'), File, + [ access(read) + ]), + load(File). + + +sample2 :- + absolute_file_name(data('src/collection-11001-12001.xml'), File1, + [ access(read) + ]), + absolute_file_name(data('src/collection-21001-22001.xml'), File2, + [ access(read) + ]), + absolute_file_name(data('src/collection-41001-42001.xml'), File3, + [ access(read) + ]), + absolute_file_name(data('src/collection-61001-62001.xml'), File4, + [ access(read) + ]), + + load(File1),load(File2),load(File3),load(File4). + + +save :- + absolute_file_name(data('rdf/ahm.ttl'), File, + [ access(write) + ]), + rdf_save_turtle(File, [graph(data)]). + +run :- + load, + load_people_bob, + load_thesaurus, + rewrite, + save. + diff --git a/examples/AHM/util.pl b/examples/AHM/util.pl new file mode 100644 index 0000000..44d8617 --- /dev/null +++ b/examples/AHM/util.pl @@ -0,0 +1,69 @@ +% Utility predicates for + +:- module(util, + [ role_to_property/2, % +Role, -SubProperty + concat_maybe/2, % +ListofArgs, -OutputLiteral + concat_maybe/3, % +ListofArgs, +Separator, -OutputLiteral + object_number_to_url/2, % +Object Number -URL + lang_to_langcode/2 % +AHMLangNr, -IsoLangCode + + ]). +:- use_module(library(semweb/rdf_db)). +:- use_module(xmlrdf(rdf_convert_util)). + + +% Utility preds that make a creator subproperty based on creator Role. +% If Role = "", use ahm:maker. + +%role_to_property('', 'http://purl.org/collections/ahm/maker'). +role_to_property(Role, Property):- + rp1(Role, Literal), + literal_to_id([Literal],ahm,Property). + +rp1('', maker). +rp1('glasblazer', glasblazer). +rp1('glasgraveur',glasgraveur). +rp1('goud- en zilversmid', goud_en_zilversmid). +rp1('glasschilder',glasschilder). +rp1('schilder', schilder). +rp1('graveur', graveur). + +% ConcatMaybe concatenates list of atoms, skipping the unbound +% variables. This is used to make labels for bnodes. A separator can +% also be passed +% + +concat_maybe(List, Literal):- + concat_maybe(List, ' ', Literal). + +concat_maybe([],_, ''). +concat_maybe([A|List],Sep, Literal):- + nonvar(A), + atom_concat(A,Sep,Pre), + concat_maybe(List,Sep, Post), + atom_concat(Pre,Post,Literal). +concat_maybe([A|List],Sep, Literal):- + var(A), + concat_maybe(List,Sep, Literal). + +% 'guess' the thumbnail url +% +object_number_to_url(ON,URL):- + space_to_underscore(ON, ON1), + atom_concat('http://ahm.adlibsoft.com/wwwopacx/wwwopac.ashx?command=retrievecontent&imageserver=images&value=',ON1, Almost), + atom_concat(Almost,'.jpg',URL). + +space_to_underscore(In, Out) :- + atom_codes(In, CodesIn), + maplist(map_space, CodesIn, CodesOut), + atom_codes(Out, CodesOut). + +map_space(0' , 0'_) :- !. +map_space(C, C). + + + +% Language for vocabulary rewrite +lang_to_langcode('0',en). +lang_to_langcode('1',nl). +lang_to_langcode('3',de).