=encoding utf8
=head1 NAME
rdf/parser/rdfa_core - RDFa Core 1.1 processor.
=head1 SYNOPSIS
from rdf/parser/rdfa_core import RdfaCoreParser, CurieExpander;
let parser := new RdfaCoreParser();
let quads := parser.parse_string(
"<doc xmlns:dc=\"http://purl.org/dc/terms/\" " _
"xmlns=\"http://example.com/\" " _
"about=\"http://example.com/book\" property=\"dc:title\">Moby Dick</doc>",
base: "http://example.com/",
);
let expander := new CurieExpander();
say( expander.expand("foaf:name") ); // http://xmlns.com/foaf/0.1/name
=head1 DESCRIPTION
This module implements the RDFa Core 1.1 processing model over a
DOM-like document tree. C<RdfaCoreParser> parses RDFa in generic XML
documents using C<std/data/xml>, with no host-language-specific
behaviour beyond C<xml:lang>, C<xml:base>, and C<xmlns:*> prefix
declarations.
The HTML and XHTML host languages are provided by
L<rdf/parser/html_rdfa> and L<rdf/parser/xhtml_rdfa>, which build on the
classes exported here.
=head1 EXPORTS
=head2 Classes
=over
=item C<CurieExpander>
Expands CURIEs, SafeCURIEs, and terms to full IRIs, seeded with the
RDFa 1.1 initial context. Construct with optional named arguments
C<iri_mappings>, C<term_mappings>, C<rel_rev_terms>, C<default_vocab>,
and C<base>.
=over
=item C<< expand(String value) -> String? >>
Expands a CURIE, SafeCURIE, or IRI to an absolute IRI string, resolving
relative IRIs against the base. Returns null when the value should be
ignored (e.g. an unresolvable SafeCURIE). Blank node identifiers are
returned unchanged.
=item C<< expand_curie(String value) -> String? >>
Expands a bare CURIE (prefix:reference). Returns null unless the prefix
has a mapping.
=item C<< expand_curie_or_iri(String value) -> String? >>
The value space of C<@about> and C<@resource>.
=item C<< expand_term_or_curie_or_absiri(String value, Boolean rel_rev := false) -> String? >>
The value space of C<@rel>, C<@rev>, C<@property>, C<@typeof>, and
C<@datatype>. Terms are matched against the term mappings (then the
host rel/rev terms when C<rel_rev> is true), then the default
vocabulary. Relative IRIs are not resolved in this value space.
=item C<< resolve_iri(String value) -> String >>
Resolves an IRI reference against the base.
=item C<< with_prefixes(Dict extra) -> CurieExpander >>
=item C<< with_vocab(vocab) -> CurieExpander >>
=item C<< with_base(String base) -> CurieExpander >>
Return derived expanders; the original is unchanged.
=back
=item C<RdfaCoreParser>
An C<RdfParser> for RDFa in generic XML. Accepts the standard C<base>
and C<into> parser options plus C<vocab_expansion> (Boolean) and
C<vocab_loader> (a function from vocabulary IRI to an array of quads).
=item C<RdfaHost>
Host-language abstraction for generic XML. Subclassed by the HTML and
XHTML host languages.
=item C<RdfaProcessor>
The RDFa Core 1.1 processing engine. Normally used via the parser
classes.
=item C<RdfaContext>
The evaluation context threaded through element processing.
=item C<RdfaIncompleteTriple>
An incomplete triple pending completion by a descendant element.
=back
=head2 Constants
C<RDFA_NS>, C<XHV_NS>, C<RDFA_INITIAL_PREFIXES>, and
C<RDFA_INITIAL_TERMS>.
=head1 COPYRIGHT AND LICENCE
B<< rdf/parser/rdfa_core >> is copyright Toby Inkster.
It is free software; you may redistribute it and/or modify it under
the terms of either the Artistic License 1.0 or the GNU General Public
License version 2.
=cut
from rdf/parser import RdfParser;
from rdf/parser/common import RDFReader, _parser_result;
from rdf/term import
RDFBlank,
RDFLiteral,
RDF_NS,
XSD_NS,
rdf_blank,
rdf_iri,
rdf_literal,
rdf_quad,
rdf_term_key;
from rdf/graph import rdf_quads_unique;
from rdf/ns import XML_NS, XMLNS_NS;
from std/string import contains, ends_with, index, join, replace, split,
starts_with, substr, trim;
const RDFA_NS := "http://www.w3.org/ns/rdfa#";
const XHV_NS := "http://www.w3.org/1999/xhtml/vocab#";
// https://www.w3.org/2011/rdfa-context/rdfa-1.1
// The empty prefix is the default prefix mapping (xhv).
const RDFA_INITIAL_PREFIXES := {
"": "http://www.w3.org/1999/xhtml/vocab#",
"as": "https://www.w3.org/ns/activitystreams#",
"cc": "http://creativecommons.org/ns#",
"csvw": "http://www.w3.org/ns/csvw#",
"ctag": "http://commontag.org/ns#",
"dc": "http://purl.org/dc/terms/",
"dc11": "http://purl.org/dc/elements/1.1/",
"dcat": "http://www.w3.org/ns/dcat#",
"dcterms": "http://purl.org/dc/terms/",
"dqv": "http://www.w3.org/ns/dqv#",
"duv": "https://www.w3.org/ns/duv#",
"foaf": "http://xmlns.com/foaf/0.1/",
"gr": "http://purl.org/goodrelations/v1#",
"grddl": "http://www.w3.org/2003/g/data-view#",
"ical": "http://www.w3.org/2002/12/cal/icaltzd#",
"ldp": "http://www.w3.org/ns/ldp#",
"ma": "http://www.w3.org/ns/ma-ont#",
"oa": "http://www.w3.org/ns/oa#",
"odrl": "http://www.w3.org/ns/odrl/2/",
"og": "http://ogp.me/ns#",
"org": "http://www.w3.org/ns/org#",
"owl": "http://www.w3.org/2002/07/owl#",
"prov": "http://www.w3.org/ns/prov#",
"qb": "http://purl.org/linked-data/cube#",
"rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
"rdfa": "http://www.w3.org/ns/rdfa#",
"rdfs": "http://www.w3.org/2000/01/rdf-schema#",
"rev": "http://purl.org/stuff/rev#",
"rif": "http://www.w3.org/2007/rif#",
"rr": "http://www.w3.org/ns/r2rml#",
"schema": "http://schema.org/",
"sd": "http://www.w3.org/ns/sparql-service-description#",
"sioc": "http://rdfs.org/sioc/ns#",
"skos": "http://www.w3.org/2004/02/skos/core#",
"skosxl": "http://www.w3.org/2008/05/skos-xl#",
"sosa": "http://www.w3.org/ns/sosa/",
"ssn": "http://www.w3.org/ns/ssn/",
"time": "http://www.w3.org/2006/time#",
"v": "http://rdf.data-vocabulary.org/#",
"vcard": "http://www.w3.org/2006/vcard/ns#",
"void": "http://rdfs.org/ns/void#",
"wdr": "http://www.w3.org/2007/05/powder#",
"wdrs": "http://www.w3.org/2007/05/powder-s#",
"xhv": "http://www.w3.org/1999/xhtml/vocab#",
"xml": "http://www.w3.org/XML/1998/namespace",
"xsd": "http://www.w3.org/2001/XMLSchema#",
};
const RDFA_INITIAL_TERMS := {
"describedby": "http://www.w3.org/2007/05/powder-s#describedby",
"license": "http://www.w3.org/1999/xhtml/vocab#license",
"role": "http://www.w3.org/1999/xhtml/vocab#role",
};
function _rdfa_escape_text ( String value ) {
let out := replace( value, "&", "&", "g" );
out := replace( out, "<", "<", "g" );
out := replace( out, ">", ">", "g" );
return out;
}
function _rdfa_escape_attr ( String value ) {
return replace( _rdfa_escape_text(value), "\"", """, "g" );
}
function _rdfa_resolve_against ( String base, String value ) {
return value if base eq "";
return ( new RDFReader(source: "") ).set_base(base).resolve_iri(value);
}
function _rdfa_strip_fragment ( String iri ) {
return iri unless contains( iri, "#" );
return split( iri, "#", 2 )[0];
}
// Split an attribute value on whitespace, dropping empty tokens.
function _rdfa_tokens ( String value ) {
let out := [];
for ( let token in split( trim(value), /\s+/ ) ) {
out.push(token) if token ne "";
}
return out;
}
function _rdfa_looks_absolute ( String value ) {
return value ~ /^[A-Za-z][A-Za-z0-9+.\-]*:/;
}
class CurieExpander {
let Dict iri_mappings with get := RDFA_INITIAL_PREFIXES.copy();
let Dict term_mappings with get := RDFA_INITIAL_TERMS.copy();
let Dict rel_rev_terms with get := {};
let default_vocab with get := null;
let String base with get := "";
method with_prefixes ( Dict extra ) {
return self if extra.keys().length() == 0;
let merged := iri_mappings.copy();
for ( let key in extra.keys() ) {
merged.set( lc(key), extra.get(key) );
}
return new CurieExpander(
iri_mappings: merged,
term_mappings: term_mappings,
rel_rev_terms: rel_rev_terms,
default_vocab: default_vocab,
base: base,
);
}
method with_vocab ( vocab ) {
return new CurieExpander(
iri_mappings: iri_mappings,
term_mappings: term_mappings,
rel_rev_terms: rel_rev_terms,
default_vocab: vocab,
base: base,
);
}
method with_base ( String new_base ) {
return new CurieExpander(
iri_mappings: iri_mappings,
term_mappings: term_mappings,
rel_rev_terms: rel_rev_terms,
default_vocab: default_vocab,
base: new_base,
);
}
method resolve_iri ( String value ) {
return _rdfa_resolve_against( base, value );
}
method expand_curie ( String value ) {
// index/substr rather than a limit-2 split: zuzu-js truncates
// the tail, breaking references that contain ":" themselves.
let colon := index( value, ":" );
return null if colon < 0;
let prefix := lc(substr( value, 0, colon ));
return null if prefix eq "_";
return null unless iri_mappings.exists(prefix);
return iri_mappings.get(prefix) _ substr( value, colon + 1 );
}
method expand_curie_or_iri ( String value ) {
if ( starts_with( value, "[" ) and ends_with( value, "]" ) ) {
let inner := substr( value, 1, length value - 2 );
return null if inner eq "";
return inner if starts_with( inner, "_:" );
return self.expand_curie(inner);
}
return value if starts_with( value, "_:" );
let expanded := self.expand_curie(value);
return expanded if not (expanded == null);
return self.resolve_iri(value);
}
method expand_term_or_curie_or_absiri ( String value, Boolean rel_rev := false ) {
return null if value eq "";
if ( not contains( value, ":" ) ) {
// Term (RDFa Core 1.1 section 7.4.3): the local default
// vocabulary wins, then term mappings (exact match, then
// case-insensitive), then the host rel/rev terms.
return default_vocab _ value if not (default_vocab == null);
return term_mappings.get(value) if term_mappings.exists(value);
let lowered := lc(value);
for ( let key in term_mappings.keys() ) {
return term_mappings.get(key) if lc(key) eq lowered;
}
if ( rel_rev and rel_rev_terms.exists(lowered) ) {
return rel_rev_terms.get(lowered);
}
return null;
}
return value if starts_with( value, "_:" );
let expanded := self.expand_curie(value);
return expanded if not (expanded == null);
return value if _rdfa_looks_absolute(value);
return null;
}
method expand ( String value ) {
return self.expand_curie_or_iri(value);
}
}
class RdfaIncompleteTriple {
let predicate with get := null;
let String direction with get := ""; // "forward", "reverse", or "list"
let list with get := null; // for "list": shared array of items
}
class RdfaContext {
let parent_subject with get := null;
let parent_object with get := null;
let Array incomplete with get := [];
let Dict list_mappings with get := {};
// The subject the inherited list mapping attaches to; a fresh
// mapping is needed when the effective subject changes.
let list_subject with get := null;
// Document-declared namespaces in scope ("xmlns" or "xmlns:p" ->
// IRI), preserved when serializing XMLLiterals.
let Dict ns_decls with get := {};
let expander with get := null;
let String lang with get := "";
}
// Host-language abstraction for generic XML (std/data/xml DOM). The
// HTML and XHTML host languages subclass this.
class RdfaHost {
// Attribute records normalized to { ns, local, name, value }. The
// namespace is derived from the qualified name because the three
// runtimes' std/data/xml implementations disagree about
// namespaceURI() on attribute nodes.
method attr_records ( node ) {
let out := [];
for ( let attr in node.attributes() ) {
let name := attr.nodeName();
let ns := "";
let local := name;
if ( name eq "xmlns" ) {
ns := XMLNS_NS;
}
else if ( contains( name, ":" ) ) {
let colon := index( name, ":" );
let qprefix := substr( name, 0, colon );
local := substr( name, colon + 1 );
if ( qprefix eq "xml" ) {
ns := XML_NS;
}
else if ( qprefix eq "xmlns" ) {
ns := XMLNS_NS;
}
else {
let reported := attr.namespaceURI();
ns := reported == null ? "" : "" _ reported;
}
}
out.push({ ns: ns, local: local, name: name, value: attr.nodeValue() });
}
return out;
}
method get_attr ( node, String name ) {
for ( let a in self.attr_records(node) ) {
return a{value} if a{ns} eq "" and a{name} eq name;
}
return null;
}
method get_lang ( node ) {
for ( let a in self.attr_records(node) ) {
return a{value} if a{ns} eq XML_NS and a{local} eq "lang";
}
return null;
}
method scan_prefixes ( node ) {
let out := {};
for ( let a in self.attr_records(node) ) {
if ( starts_with( a{name}, "xmlns:" ) ) {
out.set( lc(substr( a{name}, 6 )), a{value} );
}
else if ( a{ns} eq XMLNS_NS and a{local} ne "xmlns" ) {
out.set( lc(a{local}), a{value} );
}
}
return out;
}
// Per-element base adjustment: generic XML honours xml:base.
method element_base ( node, String current ) {
for ( let a in self.attr_records(node) ) {
if ( a{ns} eq XML_NS and a{local} eq "base" ) {
return _rdfa_strip_fragment(_rdfa_resolve_against( current, a{value} ));
}
}
return current;
}
// Document-level base (e.g. <base href> in (X)HTML).
method doc_base ( root, String default_base ) {
return default_base;
}
method text_content ( node ) {
return node.textContent();
}
// The default XML namespace declared on this element, if any.
method default_xmlns ( node ) {
for ( let a in self.attr_records(node) ) {
return a{value} if a{name} eq "xmlns";
}
return null;
}
// Serialize child nodes for an XMLLiteral. In-scope namespace
// declarations (ns_decls) are injected into top-level elements so
// the literal stays self-contained.
method serialize_children ( node, Dict ns_decls := {} ) {
let out := "";
for ( let child in node.childNodes() ) {
out _= self._serialize_node( child, ns_decls );
}
return out;
}
method _serialize_node ( node, Dict inject ) {
let kind := node.nodeKind();
if ( kind eq "text" ) {
return _rdfa_escape_text(node.textContent());
}
if ( kind eq "comment" ) {
return "<!--" _ node.textContent() _ "-->";
}
return "" unless kind eq "element";
let out := "<" _ node.nodeName();
let declared := {};
for ( let a in self.attr_records(node) ) {
declared.set( a{name}, true );
out _= " " _ a{name} _ "=\"" _ _rdfa_escape_attr(a{value}) _ "\"";
}
let names := inject.keys().sort( function ( left, right ) {
return left cmp right;
});
for ( let name in names ) {
next if declared.exists(name);
out _= " " _ name _ "=\"" _ _rdfa_escape_attr(inject.get(name)) _ "\"";
}
out _= ">";
for ( let child in node.childNodes() ) {
out _= self._serialize_node( child, {} );
}
return out _ "</" _ node.nodeName() _ ">";
}
// Host hook for HTML @datetime / <time>; returns
// { value, datatype } or null.
method value_override ( node ) {
return null;
}
method is_head_or_body ( node ) {
return false;
}
method terms () {
// Assign through a local: zuzu-rust returns Null for a bare
// `return MODULE_GLOBAL;` inside a method.
let out := RDFA_INITIAL_TERMS;
return out;
}
method rel_rev_terms () {
return {};
}
method default_vocab () {
return null;
}
method property_copying () {
return false;
}
// HTML+RDFa: with @property present, @rel/@rev values that are not
// CURIEs or absolute IRIs are ignored entirely.
method rel_rev_needs_curie_with_property () {
return false;
}
}
class RdfaProcessor {
let host with get;
let Array quads with get := [];
let Number bnode_counter := 0;
let Dict bnode_labels := {};
let Dict vocabularies_used := {};
let Boolean vocab_expansion := false;
let vocab_loader := null;
// The document's own URL: CURIEs whose prefix maps to a relative
// IRI resolve against this, not against any in-document <base>.
let String doc_origin := "";
method run ( root, String base ) {
doc_origin := _rdfa_strip_fragment(base);
let doc_base := host.doc_base( root, _rdfa_strip_fragment(base) );
let expander := new CurieExpander(
term_mappings: host.terms(),
rel_rev_terms: host.rel_rev_terms(),
default_vocab: host.default_vocab(),
base: doc_base,
);
let ctx := new RdfaContext(
parent_subject: rdf_iri(doc_base),
expander: expander,
);
self.process_element( root, ctx );
self.apply_patterns() if host.property_copying();
self.expand_vocabularies() if vocab_expansion;
return rdf_quads_unique(quads);
}
method new_bnode () {
bnode_counter++;
return rdf_blank( "rdfa" _ bnode_counter );
}
method named_bnode ( String label ) {
let key := "_:" _ label;
if ( not bnode_labels.exists(key) ) {
bnode_counter++;
let generated := label eq "" ? "rdfa" _ bnode_counter : "b_" _ label;
bnode_labels.set( key, rdf_blank(generated) );
}
return bnode_labels.get(key);
}
method emit ( s, p, o ) {
quads.push(rdf_quad( s, p, o ));
}
// Convert an expanded string (IRI or "_:label") to a subject/object term.
method _resource_term ( expanded ) {
return null if expanded == null;
return self.named_bnode(substr( expanded, 2 )) if starts_with( expanded, "_:" );
return rdf_iri(self._against_origin(expanded));
}
// Predicates must be IRIs; blank nodes are dropped.
method _predicate_term ( expanded ) {
return null if expanded == null;
return null if starts_with( expanded, "_:" );
return rdf_iri(self._against_origin(expanded));
}
// A relative result can only come from a CURIE whose prefix was
// declared with a relative IRI; it resolves against the document
// origin (suite test 0319).
method _against_origin ( String iri ) {
return iri if doc_origin eq "" or _rdfa_looks_absolute(iri);
return _rdfa_resolve_against( doc_origin, iri );
}
method _resolve_resource ( String value, CurieExpander expander ) {
return self._resource_term( expander.expand_curie_or_iri(value) );
}
method _resolve_predicates ( String value, CurieExpander expander, Boolean rel_rev ) {
let out := [];
for ( let token in _rdfa_tokens(value) ) {
let term := self._predicate_term(
expander.expand_term_or_curie_or_absiri( token, rel_rev ),
);
out.push(term) if not (term == null);
}
return out;
}
method _resolve_types ( String value, CurieExpander expander ) {
let out := [];
for ( let token in _rdfa_tokens(value) ) {
let expanded := expander.expand_term_or_curie_or_absiri( token, false );
let term := self._resource_term(expanded);
out.push(term) if not (term == null);
}
return out;
}
method process_element ( node, RdfaContext ctx ) {
let expander := ctx.get_expander();
let host_obj := host;
// xml:base (generic XML only; no-op for the HTML hosts)
let base := host_obj.element_base( node, expander.get_base() );
expander := expander.with_base(base) if base ne expander.get_base();
// Step 2: @vocab
let vocab_attr := host_obj.get_attr( node, "vocab" );
if ( not (vocab_attr == null) ) {
if ( trim(vocab_attr) eq "" ) {
expander := expander.with_vocab( host_obj.default_vocab() );
}
else {
let vocab_iri := expander.resolve_iri(trim(vocab_attr));
expander := expander.with_vocab(vocab_iri);
vocabularies_used.set( vocab_iri, true );
self.emit(
rdf_iri(base),
rdf_iri(RDFA_NS _ "usesVocabulary"),
rdf_iri(vocab_iri),
);
}
}
// Step 3: prefix mappings โ xmlns:* first, then @prefix wins.
let new_prefixes := host_obj.scan_prefixes(node);
let prefix_attr := host_obj.get_attr( node, "prefix" );
if ( not (prefix_attr == null) ) {
let tokens := _rdfa_tokens(prefix_attr);
let i := 0;
while ( i + 1 < tokens.length() ) {
let name := tokens[i];
if ( ends_with( name, ":" ) ) {
let prefix := substr( name, 0, length name - 1 );
new_prefixes.set( lc(prefix), tokens[i + 1] ) if prefix ne "";
}
i := i + 2;
}
}
expander := expander.with_prefixes(new_prefixes);
// Track document-declared namespaces (xmlns, xmlns:*, @prefix)
// for XMLLiteral serialization.
let ns_decls := ctx.get_ns_decls();
let xmlns_default := host_obj.default_xmlns(node);
if ( new_prefixes.keys().length() > 0 or not (xmlns_default == null) ) {
ns_decls := ns_decls.copy();
for ( let p in new_prefixes.keys() ) {
ns_decls.set( "xmlns:" _ p, new_prefixes.get(p) );
}
ns_decls.set( "xmlns", xmlns_default ) if not (xmlns_default == null);
}
// Step 4: language
let lang := ctx.get_lang();
let lang_attr := host_obj.get_lang(node);
lang := lang_attr if not (lang_attr == null);
// Gather the RDFa attributes.
let about_attr := host_obj.get_attr( node, "about" );
let resource_attr := host_obj.get_attr( node, "resource" );
let href_attr := host_obj.get_attr( node, "href" );
let src_attr := host_obj.get_attr( node, "src" );
let typeof_attr := host_obj.get_attr( node, "typeof" );
let property_attr := host_obj.get_attr( node, "property" );
let content_attr := host_obj.get_attr( node, "content" );
let datatype_attr := host_obj.get_attr( node, "datatype" );
let rel_attr := host_obj.get_attr( node, "rel" );
let rev_attr := host_obj.get_attr( node, "rev" );
let inlist_attr := host_obj.get_attr( node, "inlist" );
// HTML+RDFa: with @property, @rel/@rev values that are not
// CURIEs/IRIs are dropped; if nothing remains the attribute is
// treated as absent.
if ( not (property_attr == null) and host_obj.rel_rev_needs_curie_with_property() ) {
rel_attr := self._strip_term_tokens(rel_attr);
rev_attr := self._strip_term_tokens(rev_attr);
}
let has_rel := not (rel_attr == null);
let has_rev := not (rev_attr == null);
let rel_preds := has_rel ?
self._resolve_predicates( rel_attr, expander, true ) : [];
let rev_preds := has_rev ?
self._resolve_predicates( rev_attr, expander, true ) : [];
let is_root := node.parentNode() == null or
node.parentNode().nodeKind() eq "document";
let new_subject := null;
let current_object_resource := null;
let typed_resource := null;
let skip := false;
let about_term := not (about_attr == null) ?
self._resolve_resource( about_attr, expander ) : null;
let resource_term := not (resource_attr == null) ?
self._resolve_resource( resource_attr, expander ) : null;
resource_term := self._resource_term(expander.resolve_iri(href_attr))
if resource_term == null and not (href_attr == null);
resource_term := self._resource_term(expander.resolve_iri(src_attr))
if resource_term == null and not (src_attr == null);
if ( not has_rel and not has_rev ) {
if ( not (property_attr == null) and content_attr == null and datatype_attr == null ) {
// Step 5.1
if ( not (about_term == null) ) {
new_subject := about_term;
}
else if ( host_obj.is_head_or_body(node) and not (ctx.get_parent_object() == null) ) {
new_subject := ctx.get_parent_object();
}
else if ( is_root ) {
new_subject := rdf_iri(expander.resolve_iri(""));
}
else if ( not (ctx.get_parent_object() == null) ) {
new_subject := ctx.get_parent_object();
}
if ( not (typeof_attr == null) ) {
if ( not (about_term == null) ) {
typed_resource := about_term;
}
else if ( is_root ) {
typed_resource := new_subject;
}
else {
typed_resource := not (resource_term == null) ?
resource_term : self.new_bnode();
current_object_resource := typed_resource;
}
}
}
else {
// Step 5.2
if ( not (about_term == null) ) {
new_subject := about_term;
}
else if ( not (resource_term == null) ) {
new_subject := resource_term;
}
else if ( host_obj.is_head_or_body(node) and not (ctx.get_parent_object() == null) ) {
new_subject := ctx.get_parent_object();
}
else if ( is_root ) {
new_subject := rdf_iri(expander.resolve_iri(""));
}
else if ( not (typeof_attr == null) ) {
new_subject := self.new_bnode();
}
else if ( not (ctx.get_parent_object() == null) ) {
new_subject := ctx.get_parent_object();
skip := true if property_attr == null;
}
typed_resource := new_subject if not (typeof_attr == null);
}
}
else {
// Step 6: @rel/@rev present
if ( not (about_term == null) ) {
new_subject := about_term;
typed_resource := new_subject if not (typeof_attr == null);
}
else if ( host_obj.is_head_or_body(node) and not (ctx.get_parent_object() == null) ) {
new_subject := ctx.get_parent_object();
typed_resource := new_subject if not (typeof_attr == null);
}
else if ( is_root ) {
new_subject := rdf_iri(expander.resolve_iri(""));
typed_resource := new_subject if not (typeof_attr == null);
}
else if ( not (ctx.get_parent_object() == null) ) {
new_subject := ctx.get_parent_object();
}
current_object_resource := resource_term;
if ( not (typeof_attr == null) and about_term == null and
not ( is_root or host_obj.is_head_or_body(node) ) ) {
current_object_resource := self.new_bnode()
if current_object_resource == null;
typed_resource := current_object_resource;
}
}
// Step 7: @typeof
if ( not (typed_resource == null) and not (typeof_attr == null) ) {
for ( let type_term in self._resolve_types( typeof_attr, expander ) ) {
self.emit( typed_resource, rdf_iri(RDF_NS _ "type"), type_term );
}
}
// Step 8: fresh list mappings when the subject that list items
// attach to differs from the one owning the inherited mapping.
// (Core ยง7.5 step 8 compares against the parent object, but the
// official suite โ 0225/0226/0227 โ requires ownership tracking.)
let local_list_mappings := ctx.get_list_mappings();
let created_list_mappings := false;
let list_owner := ctx.get_list_subject();
if ( not (new_subject == null) and ( list_owner == null or
rdf_term_key(new_subject) ne rdf_term_key(list_owner) ) ) {
local_list_mappings := {};
created_list_mappings := true;
list_owner := new_subject;
}
// Steps 9/10: @rel/@rev triples or incomplete triples
let local_incomplete := [];
if ( has_rel or has_rev ) {
if ( not (current_object_resource == null) ) {
for ( let pred in rel_preds ) {
if ( not (inlist_attr == null) ) {
self._list_append( local_list_mappings, pred, current_object_resource );
}
else {
self.emit( new_subject, pred, current_object_resource );
}
}
for ( let pred in rev_preds ) {
self.emit( current_object_resource, pred, new_subject );
}
}
else {
current_object_resource := self.new_bnode();
for ( let pred in rel_preds ) {
if ( not (inlist_attr == null) ) {
let items := self._list_for( local_list_mappings, pred );
local_incomplete.push(new RdfaIncompleteTriple(
predicate: pred,
direction: "list",
list: items,
));
}
else {
local_incomplete.push(new RdfaIncompleteTriple(
predicate: pred,
direction: "forward",
));
}
}
for ( let pred in rev_preds ) {
local_incomplete.push(new RdfaIncompleteTriple(
predicate: pred,
direction: "reverse",
));
}
}
}
// Step 11: @property
if ( not (property_attr == null) ) {
let datatype_term := null;
let datatype_resolved := false;
if ( not (datatype_attr == null) and trim(datatype_attr) ne "" ) {
let dt := expander.expand_term_or_curie_or_absiri( trim(datatype_attr), false );
if ( not (dt == null) and not starts_with( dt, "_:" ) ) {
datatype_term := rdf_iri(dt);
datatype_resolved := true;
}
}
let value_obj := null;
if ( datatype_resolved and
rdf_term_key(datatype_term) eq rdf_term_key(rdf_iri(RDF_NS _ "XMLLiteral")) ) {
value_obj := rdf_literal(
host_obj.serialize_children( node, ns_decls ),
"",
datatype_term,
);
}
else if ( not (content_attr == null) ) {
value_obj := datatype_resolved ?
rdf_literal( content_attr, "", datatype_term ) :
rdf_literal( content_attr, lang );
}
else {
let override := host_obj.value_override(node);
if ( not (override == null) ) {
if ( datatype_resolved ) {
value_obj := rdf_literal( override{value}, "", datatype_term );
}
else if ( not (override{datatype} == null) ) {
value_obj := rdf_literal( override{value}, "", rdf_iri(override{datatype}) );
}
else {
value_obj := rdf_literal( override{value}, lang );
}
}
else if ( datatype_resolved ) {
value_obj := rdf_literal( host_obj.text_content(node), "", datatype_term );
}
else if ( not has_rel and not has_rev and content_attr == null and
datatype_attr == null and not (resource_term == null) ) {
value_obj := resource_term;
}
// Typed resource becomes the object only when @about is
// absent entirely; a present-but-unresolvable @about (e.g.
// "[]") still blocks this path (suite test 0297).
else if ( not (typeof_attr == null) and about_attr == null and
datatype_attr == null and not (typed_resource == null) ) {
value_obj := typed_resource;
}
else {
value_obj := rdf_literal( host_obj.text_content(node), lang );
}
}
for ( let pred in self._resolve_predicates( property_attr, expander, false ) ) {
if ( not (inlist_attr == null) ) {
self._list_append( local_list_mappings, pred, value_obj );
}
else {
self.emit( new_subject, pred, value_obj );
}
}
}
// Step 12: complete the parent's incomplete triples
if ( not skip and not (new_subject == null) ) {
for ( let it in ctx.get_incomplete() ) {
if ( it.get_direction() eq "forward" ) {
self.emit( ctx.get_parent_subject(), it.get_predicate(), new_subject );
}
else if ( it.get_direction() eq "reverse" ) {
self.emit( new_subject, it.get_predicate(), ctx.get_parent_subject() );
}
else {
it.get_list().push(new_subject);
}
}
}
// Role Attribute 1.0
let role_attr := host_obj.get_attr( node, "role" );
if ( not (role_attr == null) ) {
let id_attr := host_obj.get_attr( node, "id" );
let role_subject := not (id_attr == null) ?
rdf_iri(expander.resolve_iri( "#" _ id_attr )) :
self.new_bnode();
let role_expander := expander.with_vocab(XHV_NS);
for ( let token in _rdfa_tokens(role_attr) ) {
let expanded := role_expander.expand_term_or_curie_or_absiri( token, false );
let term := self._predicate_term(expanded);
next if term == null;
self.emit( role_subject, rdf_iri(XHV_NS _ "role"), term );
}
}
// Step 13: recurse
let child_ctx := null;
if ( skip ) {
child_ctx := new RdfaContext(
parent_subject: ctx.get_parent_subject(),
parent_object: ctx.get_parent_object(),
incomplete: ctx.get_incomplete(),
list_mappings: local_list_mappings,
list_subject: list_owner,
ns_decls: ns_decls,
expander: expander,
lang: lang,
);
}
else {
let child_parent_subject := not (new_subject == null) ?
new_subject : ctx.get_parent_subject();
let child_parent_object := not (current_object_resource == null) ?
current_object_resource :
( not (new_subject == null) ? new_subject : ctx.get_parent_subject() );
child_ctx := new RdfaContext(
parent_subject: child_parent_subject,
parent_object: child_parent_object,
incomplete: local_incomplete,
list_mappings: local_list_mappings,
list_subject: list_owner,
ns_decls: ns_decls,
expander: expander,
lang: lang,
);
}
for ( let child in node.childNodes() ) {
next unless child.nodeKind() eq "element";
self.process_element( child, child_ctx );
}
// Step 14: emit list triples for mappings created here
if ( created_list_mappings ) {
self._emit_lists( new_subject, local_list_mappings );
}
}
method _strip_term_tokens ( value ) {
return null if value == null;
let kept := [];
for ( let token in _rdfa_tokens(value) ) {
kept.push(token) if contains( token, ":" );
}
return null if kept.length() == 0;
return join( " ", kept );
}
method _list_for ( Dict mappings, pred ) {
let key := rdf_term_key(pred);
if ( not mappings.exists(key) ) {
mappings.set( key, { predicate: pred, items: [] } );
}
return mappings.get(key){items};
}
method _list_append ( Dict mappings, pred, item ) {
self._list_for( mappings, pred ).push(item);
}
method _emit_lists ( subject, Dict mappings ) {
for ( let key in mappings.keys() ) {
let entry := mappings.get(key);
let items := entry{items};
if ( items.length() == 0 ) {
self.emit( subject, entry{predicate}, rdf_iri(RDF_NS _ "nil") );
next;
}
let cells := [];
for ( let item in items ) {
cells.push(self.new_bnode());
}
let i := 0;
while ( i < items.length() ) {
self.emit( cells[i], rdf_iri(RDF_NS _ "first"), items[i] );
let rest := i + 1 == items.length() ?
rdf_iri(RDF_NS _ "nil") : cells[i + 1];
self.emit( cells[i], rdf_iri(RDF_NS _ "rest"), rest );
i++;
}
self.emit( subject, entry{predicate}, cells[0] );
}
}
// HTML+RDFa property copying: rdfa:copy / rdfa:Pattern.
method apply_patterns () {
let copy_pred := rdf_term_key(rdf_iri(RDFA_NS _ "copy"));
let type_pred := rdf_term_key(rdf_iri(RDF_NS _ "type"));
let pattern_type := rdf_term_key(rdf_iri(RDFA_NS _ "Pattern"));
// Identify pattern subjects.
let patterns := {};
for ( let q in quads ) {
if ( rdf_term_key(q.get_predicate()) eq type_pred and
rdf_term_key(q.get_object()) eq pattern_type ) {
patterns.set( rdf_term_key(q.get_subject()), true );
}
}
return null if patterns.keys().length() == 0;
// Group pattern property triples by pattern subject.
let pattern_props := {};
for ( let q in quads ) {
let skey := rdf_term_key(q.get_subject());
next unless patterns.exists(skey);
next if rdf_term_key(q.get_predicate()) eq type_pred and
rdf_term_key(q.get_object()) eq pattern_type;
pattern_props.set( skey, [] ) unless pattern_props.exists(skey);
pattern_props.get(skey).push(q);
}
// Copy properties (transitively for nested rdfa:copy) onto each
// referencing subject, tracking which patterns get referenced.
let referenced := {};
let copied := [];
for ( let q in quads ) {
next if patterns.exists(rdf_term_key(q.get_subject()));
next unless rdf_term_key(q.get_predicate()) eq copy_pred;
self._copy_pattern_into(
copied, q.get_subject(), rdf_term_key(q.get_object()),
pattern_props, copy_pred, {}, referenced,
);
}
// Drop the consumed rdfa:copy triples and the triples of
// referenced patterns; unreferenced patterns survive intact.
let out := [];
for ( let q in quads ) {
let skey := rdf_term_key(q.get_subject());
next if referenced.exists(skey);
next if rdf_term_key(q.get_predicate()) eq copy_pred and
not patterns.exists(skey);
out.push(q);
}
for ( let q in copied ) {
out.push(q);
}
quads := out;
}
method _copy_pattern_into ( Array out, subject, String pattern_key,
Dict pattern_props, String copy_pred, Dict seen, Dict referenced ) {
return null if seen.exists(pattern_key);
seen.set( pattern_key, true );
return null unless pattern_props.exists(pattern_key);
referenced.set( pattern_key, true );
for ( let q in pattern_props.get(pattern_key) ) {
if ( rdf_term_key(q.get_predicate()) eq copy_pred ) {
self._copy_pattern_into(
out, subject, rdf_term_key(q.get_object()),
pattern_props, copy_pred, seen, referenced,
);
next;
}
out.push(rdf_quad( subject, q.get_predicate(), q.get_object() ));
}
}
// RDFa vocabulary expansion (RDFa Core 1.1 section 10).
method expand_vocabularies () {
return null if vocabularies_used.keys().length() == 0;
let loader := vocab_loader;
loader := fn ( String iri ) -> self._default_vocab_loader(iri)
if loader == null;
let sub_prop := {};
let sub_class := {};
for ( let vocab_iri in vocabularies_used.keys() ) {
let vocab_quads := loader(vocab_iri);
next if vocab_quads == null;
for ( let q in vocab_quads ) {
let p := rdf_term_key(q.get_predicate());
let s := q.get_subject();
let o := q.get_object();
if ( p eq rdf_term_key(rdf_iri("http://www.w3.org/2000/01/rdf-schema#subPropertyOf")) ) {
self._rule_add( sub_prop, s, o );
}
else if ( p eq rdf_term_key(rdf_iri("http://www.w3.org/2000/01/rdf-schema#subClassOf")) ) {
self._rule_add( sub_class, s, o );
}
else if ( p eq rdf_term_key(rdf_iri("http://www.w3.org/2002/07/owl#equivalentProperty")) ) {
self._rule_add( sub_prop, s, o );
self._rule_add( sub_prop, o, s );
}
else if ( p eq rdf_term_key(rdf_iri("http://www.w3.org/2002/07/owl#equivalentClass")) ) {
self._rule_add( sub_class, s, o );
self._rule_add( sub_class, o, s );
}
}
}
// Materialise to a fixpoint.
let type_key := rdf_term_key(rdf_iri(RDF_NS _ "type"));
let changed := true;
while ( changed ) {
changed := false;
let seen := {};
for ( let q in quads ) {
seen.set( rdf_term_key(q.get_subject()) _ " " _
rdf_term_key(q.get_predicate()) _ " " _
rdf_term_key(q.get_object()), true );
}
let additions := [];
for ( let q in quads ) {
let pkey := rdf_term_key(q.get_predicate());
if ( sub_prop.exists(pkey) ) {
for ( let super_term in sub_prop.get(pkey){supers} ) {
let candidate := rdf_quad( q.get_subject(), super_term, q.get_object() );
let ckey := rdf_term_key(q.get_subject()) _ " " _
rdf_term_key(super_term) _ " " _ rdf_term_key(q.get_object());
if ( not seen.exists(ckey) ) {
seen.set( ckey, true );
additions.push(candidate);
}
}
}
if ( pkey eq type_key ) {
let okey := rdf_term_key(q.get_object());
if ( sub_class.exists(okey) ) {
for ( let super_term in sub_class.get(okey){supers} ) {
let ckey := rdf_term_key(q.get_subject()) _ " " _
type_key _ " " _ rdf_term_key(super_term);
if ( not seen.exists(ckey) ) {
seen.set( ckey, true );
additions.push(rdf_quad(
q.get_subject(), rdf_iri(RDF_NS _ "type"), super_term,
));
}
}
}
}
}
if ( additions.length() > 0 ) {
changed := true;
for ( let a in additions ) {
quads.push(a);
}
}
}
}
method _rule_add ( Dict table, sub, super_term ) {
let key := rdf_term_key(sub);
table.set( key, { supers: [] } ) unless table.exists(key);
table.get(key){supers}.push(super_term);
}
method _default_vocab_loader ( String iri ) {
from std/net/http import UserAgent;
from rdf/parser/turtle import TurtleParser;
from rdf/parser/rdfxml import RdfXmlParser;
let ua := new UserAgent(
default_headers: { Accept: "text/turtle, application/rdf+xml;q=0.9" },
);
let res := ua.get(iri).expect_success();
let ct := lc("" _ res.header("content-type"));
let parser := contains( ct, "rdf+xml" ) ?
new RdfXmlParser() : new TurtleParser();
return parser.parse_string( res.text(), base: iri );
}
}
// Options shared by the RDFa parser classes.
class RdfaParserOptions {
let String base with get := "";
let into with get := null;
let Boolean vocab_expansion with get := false;
let vocab_loader with get := null;
static method from_pairs ( PairList options ) {
let base := "";
let into := null;
let vocab_expansion := false;
let vocab_loader := null;
for ( let pair in options.to_Array() ) {
if ( pair.key eq "base" ) {
base := "" _ pair.value;
}
else if ( pair.key eq "into" ) {
into := pair.value;
}
else if ( pair.key eq "vocab_expansion" ) {
vocab_expansion := pair.value ? true : false;
}
else if ( pair.key eq "vocab_loader" ) {
vocab_loader := pair.value;
}
else {
die "rdfa parser: unsupported option '" _ pair.key _ "'";
}
}
return new RdfaParserOptions(
base: base,
into: into,
vocab_expansion: vocab_expansion,
vocab_loader: vocab_loader,
);
}
method result ( Array quads ) {
if ( not (into == null) ) {
into.add_quads(quads);
return into;
}
return quads;
}
}
class RdfaCoreParser with RdfParser {
method host () {
return new RdfaHost();
}
method parse_string ( String text, ... PairList options ) {
from std/data/xml import XML;
let opts := RdfaParserOptions.from_pairs(options);
let doc := XML.parse(text);
let processor := new RdfaProcessor(
host: self.host(),
vocab_expansion: opts.get_vocab_expansion(),
vocab_loader: opts.get_vocab_loader(),
);
let quads := processor.run( doc.documentElement(), opts.get_base() );
return opts.result(quads);
}
}
modules/rdf/parser/rdfa_core.zzm
rdf-rdfa-0.0.1 source code
Package
- Name
- rdf-rdfa
- Version
- 0.0.1
- Uploaded
- 2026-06-13 00:17:04
- Repository
- https://github.com/tobyink/zuzu-rdf-rdfa
- Dependencies
-
-
html/parser>= 0 -
rdf>= 0 -
std/data/xml>= 0 -
std/io>= 0 -
std/string>= 0
-
- Metadata
- zuzu-distribution.json
- Archive
- Download .tar.gz