modules/rdf/parser/xhtml_rdfa.zzm

rdf-rdfa-0.0.1 source code

Package

Name
rdf-rdfa
Version
0.0.1
Uploaded
2026-06-13 00:17:04
Repository
https://github.com/tobyink/zuzu-rdf-rdfa
Dependencies
Metadata
zuzu-distribution.json
Archive
Download .tar.gz
=encoding utf8

=head1 NAME

rdf/parser/xhtml_rdfa - XHTML+RDFa 1.1 parser.

=head1 SYNOPSIS

  from rdf/parser/xhtml_rdfa import XhtmlRdfaParser;

  let parser := new XhtmlRdfaParser();
  let quads := parser.parse_file(
    new Path("page.xhtml"),
    base: "http://example.com/page.xhtml",
  );

=head1 DESCRIPTION

C<XhtmlRdfaParser> implements W3C XHTML+RDFa 1.1 (Third Edition). The
input is assumed to be well-formed XML and is parsed with
C<std/data/xml>.

Host language behaviour on top of RDFa Core 1.1: the C<base> element
sets the base IRI; C<head> and C<body> inherit the parent object as
subject when no resource attributes are present; the XHTML vocabulary
terms (C<alternate>, C<next>, C<license>, ...) are recognised in
C<@rel> and C<@rev>; language comes from C<xml:lang> (preferred) or
C<lang>.

Accepts the standard C<base> and C<into> parser options plus
C<vocab_expansion> and C<vocab_loader> (see L<rdf/parser/rdfa_core>).

=head1 EXPORTS

=head2 Classes

=over

=item C<XhtmlRdfaParser>

The parser class; composes the C<RdfParser> trait so C<parse_string>,
C<parse_file>, C<parse_lines>, and C<parse_chunks> are all available.

=item C<XhtmlRdfaHost>

The XHTML host-language configuration, extending C<RdfaHost>.

=back

=head1 COPYRIGHT AND LICENCE

B<< rdf/parser/xhtml_rdfa >> is copyright Toby Inkster.

It is free software; you may redistribute it and/or modify it under
the terms of either the Artistic License 1.0 or the GNU General Public
License version 2.

=cut

from rdf/parser import RdfParser;
from rdf/parser/rdfa_core import
	RdfaHost,
	RdfaParserOptions,
	RdfaProcessor,
	XHV_NS,
	_rdfa_resolve_against,
	_rdfa_strip_fragment;

// XHTML+RDFa 1.1 reserved vocabulary terms; keys lowercase.
const XHV_TERMS := {
	"alternate":  XHV_NS _ "alternate",
	"appendix":   XHV_NS _ "appendix",
	"bookmark":   XHV_NS _ "bookmark",
	"chapter":    XHV_NS _ "chapter",
	"cite":       XHV_NS _ "cite",
	"contents":   XHV_NS _ "contents",
	"copyright":  XHV_NS _ "copyright",
	"first":      XHV_NS _ "first",
	"glossary":   XHV_NS _ "glossary",
	"help":       XHV_NS _ "help",
	"icon":       XHV_NS _ "icon",
	"index":      XHV_NS _ "index",
	"last":       XHV_NS _ "last",
	"license":    XHV_NS _ "license",
	"meta":       XHV_NS _ "meta",
	"next":       XHV_NS _ "next",
	"p3pv1":      XHV_NS _ "p3pv1",
	"prev":       XHV_NS _ "prev",
	"previous":   XHV_NS _ "previous",
	"role":       XHV_NS _ "role",
	"section":    XHV_NS _ "section",
	"start":      XHV_NS _ "start",
	"stylesheet": XHV_NS _ "stylesheet",
	"subsection": XHV_NS _ "subsection",
	"top":        XHV_NS _ "top",
	"up":         XHV_NS _ "up",
};

class XhtmlRdfaHost extends RdfaHost {
	method get_lang ( node ) {
		let xml_lang := super(node);
		return xml_lang if not (xml_lang == null);
		return self.get_attr( node, "lang" );
	}

	// XHTML uses the base element, not xml:base.
	method element_base ( node, String current ) {
		return current;
	}

	method doc_base ( root, String default_base ) {
		let found := self._find_base_href(root);
		return default_base if found == null;
		return _rdfa_strip_fragment(_rdfa_resolve_against( default_base, found ));
	}

	method _find_base_href ( node ) {
		return null unless node.nodeKind() eq "element";
		if ( node.localName() eq "base" ) {
			let href := self.get_attr( node, "href" );
			return href if not (href == null);
		}
		for ( let child in node.childNodes() ) {
			next unless child.nodeKind() eq "element";
			let found := self._find_base_href(child);
			return found if not (found == null);
		}
		return null;
	}

	method is_head_or_body ( node ) {
		return node.localName() in [ "head", "body" ];
	}

	method rel_rev_terms () {
		// Local copy works around the zuzu-rust bare-return-global bug.
		let out := XHV_TERMS;
		return out;
	}

	// XHTML+RDFa defines the XHTML vocabulary terms for all term
	// attributes, not just @rel/@rev (suite test 0260).
	method terms () {
		let merged := {};
		let base_terms := super();
		for ( let key in base_terms.keys() ) {
			merged.set( key, base_terms.get(key) );
		}
		for ( let key in XHV_TERMS.keys() ) {
			merged.set( key, XHV_TERMS.get(key) ) unless merged.exists(key);
		}
		return merged;
	}
}

class XhtmlRdfaParser with RdfParser {
	method parse_string ( String text, ... PairList options ) {
		from std/data/xml import XML;
		let opts := RdfaParserOptions.from_pairs(options);
		let doc := XML.parse(text);
		let processor := new RdfaProcessor(
			host: new XhtmlRdfaHost(),
			vocab_expansion: opts.get_vocab_expansion(),
			vocab_loader: opts.get_vocab_loader(),
		);
		let quads := processor.run( doc.documentElement(), opts.get_base() );
		return opts.result(quads);
	}
}