modules/rdf/parser/rdfa_core.zzm

rdf-rdfa-0.0.1 source code

Package

Name
rdf-rdfa
Version
0.0.1
Uploaded
2026-06-13 00:17:04
Repository
https://github.com/tobyink/zuzu-rdf-rdfa
Dependencies
Metadata
zuzu-distribution.json
Archive
Download .tar.gz
=encoding utf8

=head1 NAME

rdf/parser/rdfa_core - RDFa Core 1.1 processor.

=head1 SYNOPSIS

  from rdf/parser/rdfa_core import RdfaCoreParser, CurieExpander;

  let parser := new RdfaCoreParser();
  let quads := parser.parse_string(
    "<doc xmlns:dc=\"http://purl.org/dc/terms/\" " _
    "xmlns=\"http://example.com/\" " _
    "about=\"http://example.com/book\" property=\"dc:title\">Moby Dick</doc>",
    base: "http://example.com/",
  );

  let expander := new CurieExpander();
  say( expander.expand("foaf:name") );   // http://xmlns.com/foaf/0.1/name

=head1 DESCRIPTION

This module implements the RDFa Core 1.1 processing model over a
DOM-like document tree. C<RdfaCoreParser> parses RDFa in generic XML
documents using C<std/data/xml>, with no host-language-specific
behaviour beyond C<xml:lang>, C<xml:base>, and C<xmlns:*> prefix
declarations.

The HTML and XHTML host languages are provided by
L<rdf/parser/html_rdfa> and L<rdf/parser/xhtml_rdfa>, which build on the
classes exported here.

=head1 EXPORTS

=head2 Classes

=over

=item C<CurieExpander>

Expands CURIEs, SafeCURIEs, and terms to full IRIs, seeded with the
RDFa 1.1 initial context. Construct with optional named arguments
C<iri_mappings>, C<term_mappings>, C<rel_rev_terms>, C<default_vocab>,
and C<base>.

=over

=item C<< expand(String value) -> String? >>

Expands a CURIE, SafeCURIE, or IRI to an absolute IRI string, resolving
relative IRIs against the base. Returns null when the value should be
ignored (e.g. an unresolvable SafeCURIE). Blank node identifiers are
returned unchanged.

=item C<< expand_curie(String value) -> String? >>

Expands a bare CURIE (prefix:reference). Returns null unless the prefix
has a mapping.

=item C<< expand_curie_or_iri(String value) -> String? >>

The value space of C<@about> and C<@resource>.

=item C<< expand_term_or_curie_or_absiri(String value, Boolean rel_rev := false) -> String? >>

The value space of C<@rel>, C<@rev>, C<@property>, C<@typeof>, and
C<@datatype>. Terms are matched against the term mappings (then the
host rel/rev terms when C<rel_rev> is true), then the default
vocabulary. Relative IRIs are not resolved in this value space.

=item C<< resolve_iri(String value) -> String >>

Resolves an IRI reference against the base.

=item C<< with_prefixes(Dict extra) -> CurieExpander >>

=item C<< with_vocab(vocab) -> CurieExpander >>

=item C<< with_base(String base) -> CurieExpander >>

Return derived expanders; the original is unchanged.

=back

=item C<RdfaCoreParser>

An C<RdfParser> for RDFa in generic XML. Accepts the standard C<base>
and C<into> parser options plus C<vocab_expansion> (Boolean) and
C<vocab_loader> (a function from vocabulary IRI to an array of quads).

=item C<RdfaHost>

Host-language abstraction for generic XML. Subclassed by the HTML and
XHTML host languages.

=item C<RdfaProcessor>

The RDFa Core 1.1 processing engine. Normally used via the parser
classes.

=item C<RdfaContext>

The evaluation context threaded through element processing.

=item C<RdfaIncompleteTriple>

An incomplete triple pending completion by a descendant element.

=back

=head2 Constants

C<RDFA_NS>, C<XHV_NS>, C<RDFA_INITIAL_PREFIXES>, and
C<RDFA_INITIAL_TERMS>.

=head1 COPYRIGHT AND LICENCE

B<< rdf/parser/rdfa_core >> is copyright Toby Inkster.

It is free software; you may redistribute it and/or modify it under
the terms of either the Artistic License 1.0 or the GNU General Public
License version 2.

=cut

from rdf/parser import RdfParser;
from rdf/parser/common import RDFReader, _parser_result;
from rdf/term import
	RDFBlank,
	RDFLiteral,
	RDF_NS,
	XSD_NS,
	rdf_blank,
	rdf_iri,
	rdf_literal,
	rdf_quad,
	rdf_term_key;
from rdf/graph import rdf_quads_unique;
from rdf/ns import XML_NS, XMLNS_NS;
from std/string import contains, ends_with, index, join, replace, split,
	starts_with, substr, trim;

const RDFA_NS := "http://www.w3.org/ns/rdfa#";
const XHV_NS := "http://www.w3.org/1999/xhtml/vocab#";

// https://www.w3.org/2011/rdfa-context/rdfa-1.1
// The empty prefix is the default prefix mapping (xhv).
const RDFA_INITIAL_PREFIXES := {
	"":        "http://www.w3.org/1999/xhtml/vocab#",
	"as":      "https://www.w3.org/ns/activitystreams#",
	"cc":      "http://creativecommons.org/ns#",
	"csvw":    "http://www.w3.org/ns/csvw#",
	"ctag":    "http://commontag.org/ns#",
	"dc":      "http://purl.org/dc/terms/",
	"dc11":    "http://purl.org/dc/elements/1.1/",
	"dcat":    "http://www.w3.org/ns/dcat#",
	"dcterms": "http://purl.org/dc/terms/",
	"dqv":     "http://www.w3.org/ns/dqv#",
	"duv":     "https://www.w3.org/ns/duv#",
	"foaf":    "http://xmlns.com/foaf/0.1/",
	"gr":      "http://purl.org/goodrelations/v1#",
	"grddl":   "http://www.w3.org/2003/g/data-view#",
	"ical":    "http://www.w3.org/2002/12/cal/icaltzd#",
	"ldp":     "http://www.w3.org/ns/ldp#",
	"ma":      "http://www.w3.org/ns/ma-ont#",
	"oa":      "http://www.w3.org/ns/oa#",
	"odrl":    "http://www.w3.org/ns/odrl/2/",
	"og":      "http://ogp.me/ns#",
	"org":     "http://www.w3.org/ns/org#",
	"owl":     "http://www.w3.org/2002/07/owl#",
	"prov":    "http://www.w3.org/ns/prov#",
	"qb":      "http://purl.org/linked-data/cube#",
	"rdf":     "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
	"rdfa":    "http://www.w3.org/ns/rdfa#",
	"rdfs":    "http://www.w3.org/2000/01/rdf-schema#",
	"rev":     "http://purl.org/stuff/rev#",
	"rif":     "http://www.w3.org/2007/rif#",
	"rr":      "http://www.w3.org/ns/r2rml#",
	"schema":  "http://schema.org/",
	"sd":      "http://www.w3.org/ns/sparql-service-description#",
	"sioc":    "http://rdfs.org/sioc/ns#",
	"skos":    "http://www.w3.org/2004/02/skos/core#",
	"skosxl":  "http://www.w3.org/2008/05/skos-xl#",
	"sosa":    "http://www.w3.org/ns/sosa/",
	"ssn":     "http://www.w3.org/ns/ssn/",
	"time":    "http://www.w3.org/2006/time#",
	"v":       "http://rdf.data-vocabulary.org/#",
	"vcard":   "http://www.w3.org/2006/vcard/ns#",
	"void":    "http://rdfs.org/ns/void#",
	"wdr":     "http://www.w3.org/2007/05/powder#",
	"wdrs":    "http://www.w3.org/2007/05/powder-s#",
	"xhv":     "http://www.w3.org/1999/xhtml/vocab#",
	"xml":     "http://www.w3.org/XML/1998/namespace",
	"xsd":     "http://www.w3.org/2001/XMLSchema#",
};

const RDFA_INITIAL_TERMS := {
	"describedby": "http://www.w3.org/2007/05/powder-s#describedby",
	"license":     "http://www.w3.org/1999/xhtml/vocab#license",
	"role":        "http://www.w3.org/1999/xhtml/vocab#role",
};

function _rdfa_escape_text ( String value ) {
	let out := replace( value, "&", "&amp;", "g" );
	out := replace( out, "<", "&lt;", "g" );
	out := replace( out, ">", "&gt;", "g" );
	return out;
}

function _rdfa_escape_attr ( String value ) {
	return replace( _rdfa_escape_text(value), "\"", "&quot;", "g" );
}

function _rdfa_resolve_against ( String base, String value ) {
	return value if base eq "";
	return ( new RDFReader(source: "") ).set_base(base).resolve_iri(value);
}

function _rdfa_strip_fragment ( String iri ) {
	return iri unless contains( iri, "#" );
	return split( iri, "#", 2 )[0];
}

// Split an attribute value on whitespace, dropping empty tokens.
function _rdfa_tokens ( String value ) {
	let out := [];
	for ( let token in split( trim(value), /\s+/ ) ) {
		out.push(token) if token ne "";
	}
	return out;
}

function _rdfa_looks_absolute ( String value ) {
	return value ~ /^[A-Za-z][A-Za-z0-9+.\-]*:/;
}

class CurieExpander {
	let Dict iri_mappings with get := RDFA_INITIAL_PREFIXES.copy();
	let Dict term_mappings with get := RDFA_INITIAL_TERMS.copy();
	let Dict rel_rev_terms with get := {};
	let default_vocab with get := null;
	let String base with get := "";

	method with_prefixes ( Dict extra ) {
		return self if extra.keys().length() == 0;
		let merged := iri_mappings.copy();
		for ( let key in extra.keys() ) {
			merged.set( lc(key), extra.get(key) );
		}
		return new CurieExpander(
			iri_mappings: merged,
			term_mappings: term_mappings,
			rel_rev_terms: rel_rev_terms,
			default_vocab: default_vocab,
			base: base,
		);
	}

	method with_vocab ( vocab ) {
		return new CurieExpander(
			iri_mappings: iri_mappings,
			term_mappings: term_mappings,
			rel_rev_terms: rel_rev_terms,
			default_vocab: vocab,
			base: base,
		);
	}

	method with_base ( String new_base ) {
		return new CurieExpander(
			iri_mappings: iri_mappings,
			term_mappings: term_mappings,
			rel_rev_terms: rel_rev_terms,
			default_vocab: default_vocab,
			base: new_base,
		);
	}

	method resolve_iri ( String value ) {
		return _rdfa_resolve_against( base, value );
	}

	method expand_curie ( String value ) {
		// index/substr rather than a limit-2 split: zuzu-js truncates
		// the tail, breaking references that contain ":" themselves.
		let colon := index( value, ":" );
		return null if colon < 0;
		let prefix := lc(substr( value, 0, colon ));
		return null if prefix eq "_";
		return null unless iri_mappings.exists(prefix);
		return iri_mappings.get(prefix) _ substr( value, colon + 1 );
	}

	method expand_curie_or_iri ( String value ) {
		if ( starts_with( value, "[" ) and ends_with( value, "]" ) ) {
			let inner := substr( value, 1, length value - 2 );
			return null if inner eq "";
			return inner if starts_with( inner, "_:" );
			return self.expand_curie(inner);
		}
		return value if starts_with( value, "_:" );
		let expanded := self.expand_curie(value);
		return expanded if not (expanded == null);
		return self.resolve_iri(value);
	}

	method expand_term_or_curie_or_absiri ( String value, Boolean rel_rev := false ) {
		return null if value eq "";
		if ( not contains( value, ":" ) ) {
			// Term (RDFa Core 1.1 section 7.4.3): the local default
			// vocabulary wins, then term mappings (exact match, then
			// case-insensitive), then the host rel/rev terms.
			return default_vocab _ value if not (default_vocab == null);
			return term_mappings.get(value) if term_mappings.exists(value);
			let lowered := lc(value);
			for ( let key in term_mappings.keys() ) {
				return term_mappings.get(key) if lc(key) eq lowered;
			}
			if ( rel_rev and rel_rev_terms.exists(lowered) ) {
				return rel_rev_terms.get(lowered);
			}
			return null;
		}
		return value if starts_with( value, "_:" );
		let expanded := self.expand_curie(value);
		return expanded if not (expanded == null);
		return value if _rdfa_looks_absolute(value);
		return null;
	}

	method expand ( String value ) {
		return self.expand_curie_or_iri(value);
	}
}

class RdfaIncompleteTriple {
	let predicate with get := null;
	let String direction with get := "";   // "forward", "reverse", or "list"
	let list with get := null;             // for "list": shared array of items
}

class RdfaContext {
	let parent_subject with get := null;
	let parent_object with get := null;
	let Array incomplete with get := [];
	let Dict list_mappings with get := {};
	// The subject the inherited list mapping attaches to; a fresh
	// mapping is needed when the effective subject changes.
	let list_subject with get := null;
	// Document-declared namespaces in scope ("xmlns" or "xmlns:p" ->
	// IRI), preserved when serializing XMLLiterals.
	let Dict ns_decls with get := {};
	let expander with get := null;
	let String lang with get := "";
}

// Host-language abstraction for generic XML (std/data/xml DOM). The
// HTML and XHTML host languages subclass this.
class RdfaHost {
	// Attribute records normalized to { ns, local, name, value }. The
	// namespace is derived from the qualified name because the three
	// runtimes' std/data/xml implementations disagree about
	// namespaceURI() on attribute nodes.
	method attr_records ( node ) {
		let out := [];
		for ( let attr in node.attributes() ) {
			let name := attr.nodeName();
			let ns := "";
			let local := name;
			if ( name eq "xmlns" ) {
				ns := XMLNS_NS;
			}
			else if ( contains( name, ":" ) ) {
				let colon := index( name, ":" );
				let qprefix := substr( name, 0, colon );
				local := substr( name, colon + 1 );
				if ( qprefix eq "xml" ) {
					ns := XML_NS;
				}
				else if ( qprefix eq "xmlns" ) {
					ns := XMLNS_NS;
				}
				else {
					let reported := attr.namespaceURI();
					ns := reported == null ? "" : "" _ reported;
				}
			}
			out.push({ ns: ns, local: local, name: name, value: attr.nodeValue() });
		}
		return out;
	}

	method get_attr ( node, String name ) {
		for ( let a in self.attr_records(node) ) {
			return a{value} if a{ns} eq "" and a{name} eq name;
		}
		return null;
	}

	method get_lang ( node ) {
		for ( let a in self.attr_records(node) ) {
			return a{value} if a{ns} eq XML_NS and a{local} eq "lang";
		}
		return null;
	}

	method scan_prefixes ( node ) {
		let out := {};
		for ( let a in self.attr_records(node) ) {
			if ( starts_with( a{name}, "xmlns:" ) ) {
				out.set( lc(substr( a{name}, 6 )), a{value} );
			}
			else if ( a{ns} eq XMLNS_NS and a{local} ne "xmlns" ) {
				out.set( lc(a{local}), a{value} );
			}
		}
		return out;
	}

	// Per-element base adjustment: generic XML honours xml:base.
	method element_base ( node, String current ) {
		for ( let a in self.attr_records(node) ) {
			if ( a{ns} eq XML_NS and a{local} eq "base" ) {
				return _rdfa_strip_fragment(_rdfa_resolve_against( current, a{value} ));
			}
		}
		return current;
	}

	// Document-level base (e.g. <base href> in (X)HTML).
	method doc_base ( root, String default_base ) {
		return default_base;
	}

	method text_content ( node ) {
		return node.textContent();
	}

	// The default XML namespace declared on this element, if any.
	method default_xmlns ( node ) {
		for ( let a in self.attr_records(node) ) {
			return a{value} if a{name} eq "xmlns";
		}
		return null;
	}

	// Serialize child nodes for an XMLLiteral. In-scope namespace
	// declarations (ns_decls) are injected into top-level elements so
	// the literal stays self-contained.
	method serialize_children ( node, Dict ns_decls := {} ) {
		let out := "";
		for ( let child in node.childNodes() ) {
			out _= self._serialize_node( child, ns_decls );
		}
		return out;
	}

	method _serialize_node ( node, Dict inject ) {
		let kind := node.nodeKind();
		if ( kind eq "text" ) {
			return _rdfa_escape_text(node.textContent());
		}
		if ( kind eq "comment" ) {
			return "<!--" _ node.textContent() _ "-->";
		}
		return "" unless kind eq "element";
		let out := "<" _ node.nodeName();
		let declared := {};
		for ( let a in self.attr_records(node) ) {
			declared.set( a{name}, true );
			out _= " " _ a{name} _ "=\"" _ _rdfa_escape_attr(a{value}) _ "\"";
		}
		let names := inject.keys().sort( function ( left, right ) {
			return left cmp right;
		});
		for ( let name in names ) {
			next if declared.exists(name);
			out _= " " _ name _ "=\"" _ _rdfa_escape_attr(inject.get(name)) _ "\"";
		}
		out _= ">";
		for ( let child in node.childNodes() ) {
			out _= self._serialize_node( child, {} );
		}
		return out _ "</" _ node.nodeName() _ ">";
	}

	// Host hook for HTML @datetime / <time>; returns
	// { value, datatype } or null.
	method value_override ( node ) {
		return null;
	}

	method is_head_or_body ( node ) {
		return false;
	}

	method terms () {
		// Assign through a local: zuzu-rust returns Null for a bare
		// `return MODULE_GLOBAL;` inside a method.
		let out := RDFA_INITIAL_TERMS;
		return out;
	}

	method rel_rev_terms () {
		return {};
	}

	method default_vocab () {
		return null;
	}

	method property_copying () {
		return false;
	}

	// HTML+RDFa: with @property present, @rel/@rev values that are not
	// CURIEs or absolute IRIs are ignored entirely.
	method rel_rev_needs_curie_with_property () {
		return false;
	}
}

class RdfaProcessor {
	let host with get;
	let Array quads with get := [];
	let Number bnode_counter := 0;
	let Dict bnode_labels := {};
	let Dict vocabularies_used := {};
	let Boolean vocab_expansion := false;
	let vocab_loader := null;
	// The document's own URL: CURIEs whose prefix maps to a relative
	// IRI resolve against this, not against any in-document <base>.
	let String doc_origin := "";

	method run ( root, String base ) {
		doc_origin := _rdfa_strip_fragment(base);
		let doc_base := host.doc_base( root, _rdfa_strip_fragment(base) );
		let expander := new CurieExpander(
			term_mappings: host.terms(),
			rel_rev_terms: host.rel_rev_terms(),
			default_vocab: host.default_vocab(),
			base: doc_base,
		);
		let ctx := new RdfaContext(
			parent_subject: rdf_iri(doc_base),
			expander: expander,
		);
		self.process_element( root, ctx );
		self.apply_patterns() if host.property_copying();
		self.expand_vocabularies() if vocab_expansion;
		return rdf_quads_unique(quads);
	}

	method new_bnode () {
		bnode_counter++;
		return rdf_blank( "rdfa" _ bnode_counter );
	}

	method named_bnode ( String label ) {
		let key := "_:" _ label;
		if ( not bnode_labels.exists(key) ) {
			bnode_counter++;
			let generated := label eq "" ? "rdfa" _ bnode_counter : "b_" _ label;
			bnode_labels.set( key, rdf_blank(generated) );
		}
		return bnode_labels.get(key);
	}

	method emit ( s, p, o ) {
		quads.push(rdf_quad( s, p, o ));
	}

	// Convert an expanded string (IRI or "_:label") to a subject/object term.
	method _resource_term ( expanded ) {
		return null if expanded == null;
		return self.named_bnode(substr( expanded, 2 )) if starts_with( expanded, "_:" );
		return rdf_iri(self._against_origin(expanded));
	}

	// Predicates must be IRIs; blank nodes are dropped.
	method _predicate_term ( expanded ) {
		return null if expanded == null;
		return null if starts_with( expanded, "_:" );
		return rdf_iri(self._against_origin(expanded));
	}

	// A relative result can only come from a CURIE whose prefix was
	// declared with a relative IRI; it resolves against the document
	// origin (suite test 0319).
	method _against_origin ( String iri ) {
		return iri if doc_origin eq "" or _rdfa_looks_absolute(iri);
		return _rdfa_resolve_against( doc_origin, iri );
	}

	method _resolve_resource ( String value, CurieExpander expander ) {
		return self._resource_term( expander.expand_curie_or_iri(value) );
	}

	method _resolve_predicates ( String value, CurieExpander expander, Boolean rel_rev ) {
		let out := [];
		for ( let token in _rdfa_tokens(value) ) {
			let term := self._predicate_term(
				expander.expand_term_or_curie_or_absiri( token, rel_rev ),
			);
			out.push(term) if not (term == null);
		}
		return out;
	}

	method _resolve_types ( String value, CurieExpander expander ) {
		let out := [];
		for ( let token in _rdfa_tokens(value) ) {
			let expanded := expander.expand_term_or_curie_or_absiri( token, false );
			let term := self._resource_term(expanded);
			out.push(term) if not (term == null);
		}
		return out;
	}

	method process_element ( node, RdfaContext ctx ) {
		let expander := ctx.get_expander();
		let host_obj := host;

		// xml:base (generic XML only; no-op for the HTML hosts)
		let base := host_obj.element_base( node, expander.get_base() );
		expander := expander.with_base(base) if base ne expander.get_base();

		// Step 2: @vocab
		let vocab_attr := host_obj.get_attr( node, "vocab" );
		if ( not (vocab_attr == null) ) {
			if ( trim(vocab_attr) eq "" ) {
				expander := expander.with_vocab( host_obj.default_vocab() );
			}
			else {
				let vocab_iri := expander.resolve_iri(trim(vocab_attr));
				expander := expander.with_vocab(vocab_iri);
				vocabularies_used.set( vocab_iri, true );
				self.emit(
					rdf_iri(base),
					rdf_iri(RDFA_NS _ "usesVocabulary"),
					rdf_iri(vocab_iri),
				);
			}
		}

		// Step 3: prefix mappings โ€” xmlns:* first, then @prefix wins.
		let new_prefixes := host_obj.scan_prefixes(node);
		let prefix_attr := host_obj.get_attr( node, "prefix" );
		if ( not (prefix_attr == null) ) {
			let tokens := _rdfa_tokens(prefix_attr);
			let i := 0;
			while ( i + 1 < tokens.length() ) {
				let name := tokens[i];
				if ( ends_with( name, ":" ) ) {
					let prefix := substr( name, 0, length name - 1 );
					new_prefixes.set( lc(prefix), tokens[i + 1] ) if prefix ne "";
				}
				i := i + 2;
			}
		}
		expander := expander.with_prefixes(new_prefixes);

		// Track document-declared namespaces (xmlns, xmlns:*, @prefix)
		// for XMLLiteral serialization.
		let ns_decls := ctx.get_ns_decls();
		let xmlns_default := host_obj.default_xmlns(node);
		if ( new_prefixes.keys().length() > 0 or not (xmlns_default == null) ) {
			ns_decls := ns_decls.copy();
			for ( let p in new_prefixes.keys() ) {
				ns_decls.set( "xmlns:" _ p, new_prefixes.get(p) );
			}
			ns_decls.set( "xmlns", xmlns_default ) if not (xmlns_default == null);
		}

		// Step 4: language
		let lang := ctx.get_lang();
		let lang_attr := host_obj.get_lang(node);
		lang := lang_attr if not (lang_attr == null);

		// Gather the RDFa attributes.
		let about_attr    := host_obj.get_attr( node, "about" );
		let resource_attr := host_obj.get_attr( node, "resource" );
		let href_attr     := host_obj.get_attr( node, "href" );
		let src_attr      := host_obj.get_attr( node, "src" );
		let typeof_attr   := host_obj.get_attr( node, "typeof" );
		let property_attr := host_obj.get_attr( node, "property" );
		let content_attr  := host_obj.get_attr( node, "content" );
		let datatype_attr := host_obj.get_attr( node, "datatype" );
		let rel_attr      := host_obj.get_attr( node, "rel" );
		let rev_attr      := host_obj.get_attr( node, "rev" );
		let inlist_attr   := host_obj.get_attr( node, "inlist" );

		// HTML+RDFa: with @property, @rel/@rev values that are not
		// CURIEs/IRIs are dropped; if nothing remains the attribute is
		// treated as absent.
		if ( not (property_attr == null) and host_obj.rel_rev_needs_curie_with_property() ) {
			rel_attr := self._strip_term_tokens(rel_attr);
			rev_attr := self._strip_term_tokens(rev_attr);
		}

		let has_rel := not (rel_attr == null);
		let has_rev := not (rev_attr == null);

		let rel_preds := has_rel ?
			self._resolve_predicates( rel_attr, expander, true ) : [];
		let rev_preds := has_rev ?
			self._resolve_predicates( rev_attr, expander, true ) : [];

		let is_root := node.parentNode() == null or
			node.parentNode().nodeKind() eq "document";

		let new_subject := null;
		let current_object_resource := null;
		let typed_resource := null;
		let skip := false;

		let about_term := not (about_attr == null) ?
			self._resolve_resource( about_attr, expander ) : null;
		let resource_term := not (resource_attr == null) ?
			self._resolve_resource( resource_attr, expander ) : null;
		resource_term := self._resource_term(expander.resolve_iri(href_attr))
			if resource_term == null and not (href_attr == null);
		resource_term := self._resource_term(expander.resolve_iri(src_attr))
			if resource_term == null and not (src_attr == null);

		if ( not has_rel and not has_rev ) {
			if ( not (property_attr == null) and content_attr == null and datatype_attr == null ) {
				// Step 5.1
				if ( not (about_term == null) ) {
					new_subject := about_term;
				}
				else if ( host_obj.is_head_or_body(node) and not (ctx.get_parent_object() == null) ) {
					new_subject := ctx.get_parent_object();
				}
				else if ( is_root ) {
					new_subject := rdf_iri(expander.resolve_iri(""));
				}
				else if ( not (ctx.get_parent_object() == null) ) {
					new_subject := ctx.get_parent_object();
				}
				if ( not (typeof_attr == null) ) {
					if ( not (about_term == null) ) {
						typed_resource := about_term;
					}
					else if ( is_root ) {
						typed_resource := new_subject;
					}
					else {
						typed_resource := not (resource_term == null) ?
							resource_term : self.new_bnode();
						current_object_resource := typed_resource;
					}
				}
			}
			else {
				// Step 5.2
				if ( not (about_term == null) ) {
					new_subject := about_term;
				}
				else if ( not (resource_term == null) ) {
					new_subject := resource_term;
				}
				else if ( host_obj.is_head_or_body(node) and not (ctx.get_parent_object() == null) ) {
					new_subject := ctx.get_parent_object();
				}
				else if ( is_root ) {
					new_subject := rdf_iri(expander.resolve_iri(""));
				}
				else if ( not (typeof_attr == null) ) {
					new_subject := self.new_bnode();
				}
				else if ( not (ctx.get_parent_object() == null) ) {
					new_subject := ctx.get_parent_object();
					skip := true if property_attr == null;
				}
				typed_resource := new_subject if not (typeof_attr == null);
			}
		}
		else {
			// Step 6: @rel/@rev present
			if ( not (about_term == null) ) {
				new_subject := about_term;
				typed_resource := new_subject if not (typeof_attr == null);
			}
			else if ( host_obj.is_head_or_body(node) and not (ctx.get_parent_object() == null) ) {
				new_subject := ctx.get_parent_object();
				typed_resource := new_subject if not (typeof_attr == null);
			}
			else if ( is_root ) {
				new_subject := rdf_iri(expander.resolve_iri(""));
				typed_resource := new_subject if not (typeof_attr == null);
			}
			else if ( not (ctx.get_parent_object() == null) ) {
				new_subject := ctx.get_parent_object();
			}
			current_object_resource := resource_term;
			if ( not (typeof_attr == null) and about_term == null and
				not ( is_root or host_obj.is_head_or_body(node) ) ) {
				current_object_resource := self.new_bnode()
					if current_object_resource == null;
				typed_resource := current_object_resource;
			}
		}

		// Step 7: @typeof
		if ( not (typed_resource == null) and not (typeof_attr == null) ) {
			for ( let type_term in self._resolve_types( typeof_attr, expander ) ) {
				self.emit( typed_resource, rdf_iri(RDF_NS _ "type"), type_term );
			}
		}

		// Step 8: fresh list mappings when the subject that list items
		// attach to differs from the one owning the inherited mapping.
		// (Core ยง7.5 step 8 compares against the parent object, but the
		// official suite โ€” 0225/0226/0227 โ€” requires ownership tracking.)
		let local_list_mappings := ctx.get_list_mappings();
		let created_list_mappings := false;
		let list_owner := ctx.get_list_subject();
		if ( not (new_subject == null) and ( list_owner == null or
			rdf_term_key(new_subject) ne rdf_term_key(list_owner) ) ) {
			local_list_mappings := {};
			created_list_mappings := true;
			list_owner := new_subject;
		}

		// Steps 9/10: @rel/@rev triples or incomplete triples
		let local_incomplete := [];
		if ( has_rel or has_rev ) {
			if ( not (current_object_resource == null) ) {
				for ( let pred in rel_preds ) {
					if ( not (inlist_attr == null) ) {
						self._list_append( local_list_mappings, pred, current_object_resource );
					}
					else {
						self.emit( new_subject, pred, current_object_resource );
					}
				}
				for ( let pred in rev_preds ) {
					self.emit( current_object_resource, pred, new_subject );
				}
			}
			else {
				current_object_resource := self.new_bnode();
				for ( let pred in rel_preds ) {
					if ( not (inlist_attr == null) ) {
						let items := self._list_for( local_list_mappings, pred );
						local_incomplete.push(new RdfaIncompleteTriple(
							predicate: pred,
							direction: "list",
							list: items,
						));
					}
					else {
						local_incomplete.push(new RdfaIncompleteTriple(
							predicate: pred,
							direction: "forward",
						));
					}
				}
				for ( let pred in rev_preds ) {
					local_incomplete.push(new RdfaIncompleteTriple(
						predicate: pred,
						direction: "reverse",
					));
				}
			}
		}

		// Step 11: @property
		if ( not (property_attr == null) ) {
			let datatype_term := null;
			let datatype_resolved := false;
			if ( not (datatype_attr == null) and trim(datatype_attr) ne "" ) {
				let dt := expander.expand_term_or_curie_or_absiri( trim(datatype_attr), false );
				if ( not (dt == null) and not starts_with( dt, "_:" ) ) {
					datatype_term := rdf_iri(dt);
					datatype_resolved := true;
				}
			}

			let value_obj := null;
			if ( datatype_resolved and
				rdf_term_key(datatype_term) eq rdf_term_key(rdf_iri(RDF_NS _ "XMLLiteral")) ) {
				value_obj := rdf_literal(
					host_obj.serialize_children( node, ns_decls ),
					"",
					datatype_term,
				);
			}
			else if ( not (content_attr == null) ) {
				value_obj := datatype_resolved ?
					rdf_literal( content_attr, "", datatype_term ) :
					rdf_literal( content_attr, lang );
			}
			else {
				let override := host_obj.value_override(node);
				if ( not (override == null) ) {
					if ( datatype_resolved ) {
						value_obj := rdf_literal( override{value}, "", datatype_term );
					}
					else if ( not (override{datatype} == null) ) {
						value_obj := rdf_literal( override{value}, "", rdf_iri(override{datatype}) );
					}
					else {
						value_obj := rdf_literal( override{value}, lang );
					}
				}
				else if ( datatype_resolved ) {
					value_obj := rdf_literal( host_obj.text_content(node), "", datatype_term );
				}
				else if ( not has_rel and not has_rev and content_attr == null and
					datatype_attr == null and not (resource_term == null) ) {
					value_obj := resource_term;
				}
				// Typed resource becomes the object only when @about is
				// absent entirely; a present-but-unresolvable @about (e.g.
				// "[]") still blocks this path (suite test 0297).
				else if ( not (typeof_attr == null) and about_attr == null and
					datatype_attr == null and not (typed_resource == null) ) {
					value_obj := typed_resource;
				}
				else {
					value_obj := rdf_literal( host_obj.text_content(node), lang );
				}
			}

			for ( let pred in self._resolve_predicates( property_attr, expander, false ) ) {
				if ( not (inlist_attr == null) ) {
					self._list_append( local_list_mappings, pred, value_obj );
				}
				else {
					self.emit( new_subject, pred, value_obj );
				}
			}
		}

		// Step 12: complete the parent's incomplete triples
		if ( not skip and not (new_subject == null) ) {
			for ( let it in ctx.get_incomplete() ) {
				if ( it.get_direction() eq "forward" ) {
					self.emit( ctx.get_parent_subject(), it.get_predicate(), new_subject );
				}
				else if ( it.get_direction() eq "reverse" ) {
					self.emit( new_subject, it.get_predicate(), ctx.get_parent_subject() );
				}
				else {
					it.get_list().push(new_subject);
				}
			}
		}

		// Role Attribute 1.0
		let role_attr := host_obj.get_attr( node, "role" );
		if ( not (role_attr == null) ) {
			let id_attr := host_obj.get_attr( node, "id" );
			let role_subject := not (id_attr == null) ?
				rdf_iri(expander.resolve_iri( "#" _ id_attr )) :
				self.new_bnode();
			let role_expander := expander.with_vocab(XHV_NS);
			for ( let token in _rdfa_tokens(role_attr) ) {
				let expanded := role_expander.expand_term_or_curie_or_absiri( token, false );
				let term := self._predicate_term(expanded);
				next if term == null;
				self.emit( role_subject, rdf_iri(XHV_NS _ "role"), term );
			}
		}

		// Step 13: recurse
		let child_ctx := null;
		if ( skip ) {
			child_ctx := new RdfaContext(
				parent_subject: ctx.get_parent_subject(),
				parent_object: ctx.get_parent_object(),
				incomplete: ctx.get_incomplete(),
				list_mappings: local_list_mappings,
				list_subject: list_owner,
				ns_decls: ns_decls,
				expander: expander,
				lang: lang,
			);
		}
		else {
			let child_parent_subject := not (new_subject == null) ?
				new_subject : ctx.get_parent_subject();
			let child_parent_object := not (current_object_resource == null) ?
				current_object_resource :
				( not (new_subject == null) ? new_subject : ctx.get_parent_subject() );
			child_ctx := new RdfaContext(
				parent_subject: child_parent_subject,
				parent_object: child_parent_object,
				incomplete: local_incomplete,
				list_mappings: local_list_mappings,
				list_subject: list_owner,
				ns_decls: ns_decls,
				expander: expander,
				lang: lang,
			);
		}
		for ( let child in node.childNodes() ) {
			next unless child.nodeKind() eq "element";
			self.process_element( child, child_ctx );
		}

		// Step 14: emit list triples for mappings created here
		if ( created_list_mappings ) {
			self._emit_lists( new_subject, local_list_mappings );
		}
	}

	method _strip_term_tokens ( value ) {
		return null if value == null;
		let kept := [];
		for ( let token in _rdfa_tokens(value) ) {
			kept.push(token) if contains( token, ":" );
		}
		return null if kept.length() == 0;
		return join( " ", kept );
	}

	method _list_for ( Dict mappings, pred ) {
		let key := rdf_term_key(pred);
		if ( not mappings.exists(key) ) {
			mappings.set( key, { predicate: pred, items: [] } );
		}
		return mappings.get(key){items};
	}

	method _list_append ( Dict mappings, pred, item ) {
		self._list_for( mappings, pred ).push(item);
	}

	method _emit_lists ( subject, Dict mappings ) {
		for ( let key in mappings.keys() ) {
			let entry := mappings.get(key);
			let items := entry{items};
			if ( items.length() == 0 ) {
				self.emit( subject, entry{predicate}, rdf_iri(RDF_NS _ "nil") );
				next;
			}
			let cells := [];
			for ( let item in items ) {
				cells.push(self.new_bnode());
			}
			let i := 0;
			while ( i < items.length() ) {
				self.emit( cells[i], rdf_iri(RDF_NS _ "first"), items[i] );
				let rest := i + 1 == items.length() ?
					rdf_iri(RDF_NS _ "nil") : cells[i + 1];
				self.emit( cells[i], rdf_iri(RDF_NS _ "rest"), rest );
				i++;
			}
			self.emit( subject, entry{predicate}, cells[0] );
		}
	}

	// HTML+RDFa property copying: rdfa:copy / rdfa:Pattern.
	method apply_patterns () {
		let copy_pred := rdf_term_key(rdf_iri(RDFA_NS _ "copy"));
		let type_pred := rdf_term_key(rdf_iri(RDF_NS _ "type"));
		let pattern_type := rdf_term_key(rdf_iri(RDFA_NS _ "Pattern"));

		// Identify pattern subjects.
		let patterns := {};
		for ( let q in quads ) {
			if ( rdf_term_key(q.get_predicate()) eq type_pred and
				rdf_term_key(q.get_object()) eq pattern_type ) {
				patterns.set( rdf_term_key(q.get_subject()), true );
			}
		}
		return null if patterns.keys().length() == 0;

		// Group pattern property triples by pattern subject.
		let pattern_props := {};
		for ( let q in quads ) {
			let skey := rdf_term_key(q.get_subject());
			next unless patterns.exists(skey);
			next if rdf_term_key(q.get_predicate()) eq type_pred and
				rdf_term_key(q.get_object()) eq pattern_type;
			pattern_props.set( skey, [] ) unless pattern_props.exists(skey);
			pattern_props.get(skey).push(q);
		}

		// Copy properties (transitively for nested rdfa:copy) onto each
		// referencing subject, tracking which patterns get referenced.
		let referenced := {};
		let copied := [];
		for ( let q in quads ) {
			next if patterns.exists(rdf_term_key(q.get_subject()));
			next unless rdf_term_key(q.get_predicate()) eq copy_pred;
			self._copy_pattern_into(
				copied, q.get_subject(), rdf_term_key(q.get_object()),
				pattern_props, copy_pred, {}, referenced,
			);
		}

		// Drop the consumed rdfa:copy triples and the triples of
		// referenced patterns; unreferenced patterns survive intact.
		let out := [];
		for ( let q in quads ) {
			let skey := rdf_term_key(q.get_subject());
			next if referenced.exists(skey);
			next if rdf_term_key(q.get_predicate()) eq copy_pred and
				not patterns.exists(skey);
			out.push(q);
		}
		for ( let q in copied ) {
			out.push(q);
		}
		quads := out;
	}

	method _copy_pattern_into ( Array out, subject, String pattern_key,
		Dict pattern_props, String copy_pred, Dict seen, Dict referenced ) {
		return null if seen.exists(pattern_key);
		seen.set( pattern_key, true );
		return null unless pattern_props.exists(pattern_key);
		referenced.set( pattern_key, true );
		for ( let q in pattern_props.get(pattern_key) ) {
			if ( rdf_term_key(q.get_predicate()) eq copy_pred ) {
				self._copy_pattern_into(
					out, subject, rdf_term_key(q.get_object()),
					pattern_props, copy_pred, seen, referenced,
				);
				next;
			}
			out.push(rdf_quad( subject, q.get_predicate(), q.get_object() ));
		}
	}

	// RDFa vocabulary expansion (RDFa Core 1.1 section 10).
	method expand_vocabularies () {
		return null if vocabularies_used.keys().length() == 0;
		let loader := vocab_loader;
		loader := fn ( String iri ) -> self._default_vocab_loader(iri)
			if loader == null;

		let sub_prop := {};
		let sub_class := {};
		for ( let vocab_iri in vocabularies_used.keys() ) {
			let vocab_quads := loader(vocab_iri);
			next if vocab_quads == null;
			for ( let q in vocab_quads ) {
				let p := rdf_term_key(q.get_predicate());
				let s := q.get_subject();
				let o := q.get_object();
				if ( p eq rdf_term_key(rdf_iri("http://www.w3.org/2000/01/rdf-schema#subPropertyOf")) ) {
					self._rule_add( sub_prop, s, o );
				}
				else if ( p eq rdf_term_key(rdf_iri("http://www.w3.org/2000/01/rdf-schema#subClassOf")) ) {
					self._rule_add( sub_class, s, o );
				}
				else if ( p eq rdf_term_key(rdf_iri("http://www.w3.org/2002/07/owl#equivalentProperty")) ) {
					self._rule_add( sub_prop, s, o );
					self._rule_add( sub_prop, o, s );
				}
				else if ( p eq rdf_term_key(rdf_iri("http://www.w3.org/2002/07/owl#equivalentClass")) ) {
					self._rule_add( sub_class, s, o );
					self._rule_add( sub_class, o, s );
				}
			}
		}

		// Materialise to a fixpoint.
		let type_key := rdf_term_key(rdf_iri(RDF_NS _ "type"));
		let changed := true;
		while ( changed ) {
			changed := false;
			let seen := {};
			for ( let q in quads ) {
				seen.set( rdf_term_key(q.get_subject()) _ " " _
					rdf_term_key(q.get_predicate()) _ " " _
					rdf_term_key(q.get_object()), true );
			}
			let additions := [];
			for ( let q in quads ) {
				let pkey := rdf_term_key(q.get_predicate());
				if ( sub_prop.exists(pkey) ) {
					for ( let super_term in sub_prop.get(pkey){supers} ) {
						let candidate := rdf_quad( q.get_subject(), super_term, q.get_object() );
						let ckey := rdf_term_key(q.get_subject()) _ " " _
							rdf_term_key(super_term) _ " " _ rdf_term_key(q.get_object());
						if ( not seen.exists(ckey) ) {
							seen.set( ckey, true );
							additions.push(candidate);
						}
					}
				}
				if ( pkey eq type_key ) {
					let okey := rdf_term_key(q.get_object());
					if ( sub_class.exists(okey) ) {
						for ( let super_term in sub_class.get(okey){supers} ) {
							let ckey := rdf_term_key(q.get_subject()) _ " " _
								type_key _ " " _ rdf_term_key(super_term);
							if ( not seen.exists(ckey) ) {
								seen.set( ckey, true );
								additions.push(rdf_quad(
									q.get_subject(), rdf_iri(RDF_NS _ "type"), super_term,
								));
							}
						}
					}
				}
			}
			if ( additions.length() > 0 ) {
				changed := true;
				for ( let a in additions ) {
					quads.push(a);
				}
			}
		}
	}

	method _rule_add ( Dict table, sub, super_term ) {
		let key := rdf_term_key(sub);
		table.set( key, { supers: [] } ) unless table.exists(key);
		table.get(key){supers}.push(super_term);
	}

	method _default_vocab_loader ( String iri ) {
		from std/net/http import UserAgent;
		from rdf/parser/turtle import TurtleParser;
		from rdf/parser/rdfxml import RdfXmlParser;
		let ua := new UserAgent(
			default_headers: { Accept: "text/turtle, application/rdf+xml;q=0.9" },
		);
		let res := ua.get(iri).expect_success();
		let ct := lc("" _ res.header("content-type"));
		let parser := contains( ct, "rdf+xml" ) ?
			new RdfXmlParser() : new TurtleParser();
		return parser.parse_string( res.text(), base: iri );
	}
}

// Options shared by the RDFa parser classes.
class RdfaParserOptions {
	let String base with get := "";
	let into with get := null;
	let Boolean vocab_expansion with get := false;
	let vocab_loader with get := null;

	static method from_pairs ( PairList options ) {
		let base := "";
		let into := null;
		let vocab_expansion := false;
		let vocab_loader := null;
		for ( let pair in options.to_Array() ) {
			if ( pair.key eq "base" ) {
				base := "" _ pair.value;
			}
			else if ( pair.key eq "into" ) {
				into := pair.value;
			}
			else if ( pair.key eq "vocab_expansion" ) {
				vocab_expansion := pair.value ? true : false;
			}
			else if ( pair.key eq "vocab_loader" ) {
				vocab_loader := pair.value;
			}
			else {
				die "rdfa parser: unsupported option '" _ pair.key _ "'";
			}
		}
		return new RdfaParserOptions(
			base: base,
			into: into,
			vocab_expansion: vocab_expansion,
			vocab_loader: vocab_loader,
		);
	}

	method result ( Array quads ) {
		if ( not (into == null) ) {
			into.add_quads(quads);
			return into;
		}
		return quads;
	}
}

class RdfaCoreParser with RdfParser {
	method host () {
		return new RdfaHost();
	}

	method parse_string ( String text, ... PairList options ) {
		from std/data/xml import XML;
		let opts := RdfaParserOptions.from_pairs(options);
		let doc := XML.parse(text);
		let processor := new RdfaProcessor(
			host: self.host(),
			vocab_expansion: opts.get_vocab_expansion(),
			vocab_loader: opts.get_vocab_loader(),
		);
		let quads := processor.run( doc.documentElement(), opts.get_base() );
		return opts.result(quads);
	}
}