=encoding utf8
=head1 NAME
rdf/parser/xhtml_rdfa - XHTML+RDFa 1.1 parser.
=head1 SYNOPSIS
from rdf/parser/xhtml_rdfa import XhtmlRdfaParser;
let parser := new XhtmlRdfaParser();
let quads := parser.parse_file(
new Path("page.xhtml"),
base: "http://example.com/page.xhtml",
);
=head1 DESCRIPTION
C<XhtmlRdfaParser> implements W3C XHTML+RDFa 1.1 (Third Edition). The
input is assumed to be well-formed XML and is parsed with
C<std/data/xml>.
Host language behaviour on top of RDFa Core 1.1: the C<base> element
sets the base IRI; C<head> and C<body> inherit the parent object as
subject when no resource attributes are present; the XHTML vocabulary
terms (C<alternate>, C<next>, C<license>, ...) are recognised in
C<@rel> and C<@rev>; language comes from C<xml:lang> (preferred) or
C<lang>.
Accepts the standard C<base> and C<into> parser options plus
C<vocab_expansion> and C<vocab_loader> (see L<rdf/parser/rdfa_core>).
=head1 EXPORTS
=head2 Classes
=over
=item C<XhtmlRdfaParser>
The parser class; composes the C<RdfParser> trait so C<parse_string>,
C<parse_file>, C<parse_lines>, and C<parse_chunks> are all available.
=item C<XhtmlRdfaHost>
The XHTML host-language configuration, extending C<RdfaHost>.
=back
=head1 COPYRIGHT AND LICENCE
B<< rdf/parser/xhtml_rdfa >> is copyright Toby Inkster.
It is free software; you may redistribute it and/or modify it under
the terms of either the Artistic License 1.0 or the GNU General Public
License version 2.
=cut
from rdf/parser import RdfParser;
from rdf/parser/rdfa_core import
RdfaHost,
RdfaParserOptions,
RdfaProcessor,
XHV_NS,
_rdfa_resolve_against,
_rdfa_strip_fragment;
// XHTML+RDFa 1.1 reserved vocabulary terms; keys lowercase.
const XHV_TERMS := {
"alternate": XHV_NS _ "alternate",
"appendix": XHV_NS _ "appendix",
"bookmark": XHV_NS _ "bookmark",
"chapter": XHV_NS _ "chapter",
"cite": XHV_NS _ "cite",
"contents": XHV_NS _ "contents",
"copyright": XHV_NS _ "copyright",
"first": XHV_NS _ "first",
"glossary": XHV_NS _ "glossary",
"help": XHV_NS _ "help",
"icon": XHV_NS _ "icon",
"index": XHV_NS _ "index",
"last": XHV_NS _ "last",
"license": XHV_NS _ "license",
"meta": XHV_NS _ "meta",
"next": XHV_NS _ "next",
"p3pv1": XHV_NS _ "p3pv1",
"prev": XHV_NS _ "prev",
"previous": XHV_NS _ "previous",
"role": XHV_NS _ "role",
"section": XHV_NS _ "section",
"start": XHV_NS _ "start",
"stylesheet": XHV_NS _ "stylesheet",
"subsection": XHV_NS _ "subsection",
"top": XHV_NS _ "top",
"up": XHV_NS _ "up",
};
class XhtmlRdfaHost extends RdfaHost {
method get_lang ( node ) {
let xml_lang := super(node);
return xml_lang if not (xml_lang == null);
return self.get_attr( node, "lang" );
}
// XHTML uses the base element, not xml:base.
method element_base ( node, String current ) {
return current;
}
method doc_base ( root, String default_base ) {
let found := self._find_base_href(root);
return default_base if found == null;
return _rdfa_strip_fragment(_rdfa_resolve_against( default_base, found ));
}
method _find_base_href ( node ) {
return null unless node.nodeKind() eq "element";
if ( node.localName() eq "base" ) {
let href := self.get_attr( node, "href" );
return href if not (href == null);
}
for ( let child in node.childNodes() ) {
next unless child.nodeKind() eq "element";
let found := self._find_base_href(child);
return found if not (found == null);
}
return null;
}
method is_head_or_body ( node ) {
return node.localName() in [ "head", "body" ];
}
method rel_rev_terms () {
// Local copy works around the zuzu-rust bare-return-global bug.
let out := XHV_TERMS;
return out;
}
// XHTML+RDFa defines the XHTML vocabulary terms for all term
// attributes, not just @rel/@rev (suite test 0260).
method terms () {
let merged := {};
let base_terms := super();
for ( let key in base_terms.keys() ) {
merged.set( key, base_terms.get(key) );
}
for ( let key in XHV_TERMS.keys() ) {
merged.set( key, XHV_TERMS.get(key) ) unless merged.exists(key);
}
return merged;
}
}
class XhtmlRdfaParser with RdfParser {
method parse_string ( String text, ... PairList options ) {
from std/data/xml import XML;
let opts := RdfaParserOptions.from_pairs(options);
let doc := XML.parse(text);
let processor := new RdfaProcessor(
host: new XhtmlRdfaHost(),
vocab_expansion: opts.get_vocab_expansion(),
vocab_loader: opts.get_vocab_loader(),
);
let quads := processor.run( doc.documentElement(), opts.get_base() );
return opts.result(quads);
}
}
modules/rdf/parser/xhtml_rdfa.zzm
rdf-rdfa-0.0.1 source code
Package
- Name
- rdf-rdfa
- Version
- 0.0.1
- Uploaded
- 2026-06-13 00:17:04
- Repository
- https://github.com/tobyink/zuzu-rdf-rdfa
- Dependencies
-
-
html/parser>= 0 -
rdf>= 0 -
std/data/xml>= 0 -
std/io>= 0 -
std/string>= 0
-
- Metadata
- zuzu-distribution.json
- Archive
- Download .tar.gz