=encoding utf8
=head1 NAME
json/canonicalization - JSON Canonicalization Scheme (RFC 8785).
=head1 SYNOPSIS
from json/canonicalization import jcs_canonicalize;
say( jcs_canonicalize( { "b": 2, "a": 1 } ) ); // {"a":1,"b":2}
say( jcs_canonicalize( [null, true, 1.5] ) ); // [null,true,1.5]
=head1 DESCRIPTION
Implements the JSON Canonicalization Scheme defined by RFC 8785.
The single exported function C<jcs_canonicalize> accepts any
JSON-compatible ZuzuScript value (null, Boolean, Number, String,
Array, or Dict/PairList) and returns its canonical JSON string
representation.
Key properties of the canonical form:
=over
=item * Object keys are sorted by UTF-16 code unit order.
=item * No whitespace between tokens.
=item * Numbers use IEEE 754 / ES2019 C<Number::toString()> serialization.
=item * Strings use C<\"> C<\\> and shorthand C<\b \t \n \f \r> escapes;
remaining control characters (U+0000–U+001F) use lowercase C<\uXXXX>.
=back
=head1 EXPORTS
=head2 C<< jcs_canonicalize(value) -> String >>
Returns the RFC 8785 canonical JSON serialization of C<value>.
=head1 COPYRIGHT AND LICENCE
B<< json/canonicalization >> is copyright Toby Inkster.
It is free software; you may redistribute it and/or modify it under
the terms of either the Artistic License 1.0 or the GNU General Public
License version 2.
=cut
from std/data/json import JSON;
from std/math import Math;
from std/string import join, ord, sprint, substr;
let _jcs_json := new JSON();
const _JCS_HEX := "0123456789abcdef";
function _jcs_hex4 ( Number n ) {
let value := n;
let out := "";
while ( value > 0 ) {
let digit := value mod 16;
out := substr( _JCS_HEX, digit, 1 ) _ out;
value := int( value / 16 );
}
out := "0" if out eq "";
while ( length out < 4 ) {
out := "0" _ out;
}
return out;
}
function _jcs_encode_string ( String text ) {
let out := "\"";
let i := 0;
while ( i < length text ) {
let ch := substr( text, i, 1 );
let code := ord( text, i );
if ( ch eq "\"" ) {
out _= "\\\"";
}
else if ( ch eq "\\" ) {
out _= "\\\\";
}
else if ( code = 8 ) {
out _= "\\b";
}
else if ( code = 9 ) {
out _= "\\t";
}
else if ( code = 10 ) {
out _= "\\n";
}
else if ( code = 12 ) {
out _= "\\f";
}
else if ( code = 13 ) {
out _= "\\r";
}
else if ( code < 32 ) {
out _= "\\u" _ _jcs_hex4( code );
}
else {
out _= ch;
}
i++;
}
out _= "\"";
return out;
}
function _jcs_floor_log10 ( Number v ) {
let raw := Math.log10(v);
// int() truncates toward zero, so negative values need manual floor
let e := int(raw);
if ( raw < 0 and raw != e ) { e := e - 1; }
// Verify and correct for floating-point imprecision in log10
if ( v >= Math.pow(10, e + 1) ) { e := e + 1; }
else if ( v < Math.pow(10, e) ) { e := e - 1; }
return e;
}
function _jcs_number_to_string ( Number value ) {
if ( not (value > 0) and not (value < 0) ) { return "0"; }
let runtime := __system__{runtime};
// Perl: sprint %g gives the correct IEEE 754 shortest decimal (16 sig digits).
if ( runtime eq "Zuzu::Runtime" ) {
return sprint("%.16g", value);
}
// JS: native String() is already ES2019-correct.
if ( runtime eq "zuzu-js" ) {
return "" _ value;
}
// Rust: Ryū is correct for floats, but very large/small values need manual
// scientific notation because the JSON encoder emits integer form for e >= 21.
let sign := "";
let abs_v := value;
if ( value < 0 ) {
sign := "-";
abs_v := 0 - value;
}
let e := _jcs_floor_log10(abs_v);
if ( e >= 21 or e <= -7 ) {
let mantissa := abs_v / Math.pow(10, e);
let ms := _jcs_json.encode(mantissa);
let exp_part := e >= 0 ? ("e+" _ e) : ("e" _ e);
return sign _ ms _ exp_part;
}
return _jcs_json.encode(value);
}
// Compare two strings by UTF-16 code unit order as required by RFC 8785 §3.2.3.
function _jcs_cmp ( String a, String b ) {
let la := length a;
let lb := length b;
let lm := la < lb ? la : lb;
let i := 0;
while ( i < lm ) {
let ca := ord(a, i);
let cb := ord(b, i);
if ( ca != cb ) {
// For non-BMP (ca/cb >= 65536) use the high surrogate value.
let ua := ca < 65536 ? ca : (55296 + int( (ca - 65536) / 1024 ));
let ub := cb < 65536 ? cb : (55296 + int( (cb - 65536) / 1024 ));
if ( ua != ub ) { return ua < ub ? -1 : 1; }
// Same high surrogate: compare low surrogates.
let va := 56320 + (ca - 65536) mod 1024;
let vb := 56320 + (cb - 65536) mod 1024;
return va < vb ? -1 : 1;
}
i++;
}
return la < lb ? -1 : (la > lb ? 1 : 0);
}
function _jcs_pairlist_keys ( PairList obj ) {
let seen := {};
let out := [];
for ( let pair in obj.to_Array() ) {
next if seen.exists( pair.key );
seen.set( pair.key, true );
out.push( pair.key );
}
return out;
}
function _jcs_pairlist_get ( PairList obj, String key ) {
for ( let pair in obj.to_Array() ) {
return pair.value if pair.key eq key;
}
return null;
}
function jcs_canonicalize ( value ) {
if ( value == null ) {
return "null";
}
if ( value instanceof Boolean ) {
return value ? "true" : "false";
}
if ( value instanceof Number ) {
return _jcs_number_to_string(value);
}
if ( value instanceof String ) {
return _jcs_encode_string( value );
}
if ( value instanceof Array ) {
let parts := [];
for ( let item in value ) {
parts.push( jcs_canonicalize( item ) );
}
return "[" _ join( ",", parts ) _ "]";
}
if ( value instanceof PairList ) {
let keys := _jcs_pairlist_keys( value ).sort( fn ( a, b ) -> _jcs_cmp(a, b) );
let parts := [];
for ( let key in keys ) {
let v := _jcs_pairlist_get( value, key );
parts.push( _jcs_encode_string( key ) _ ":" _ jcs_canonicalize( v ) );
}
return "{" _ join( ",", parts ) _ "}";
}
if ( value instanceof Dict ) {
let keys := value.keys().sort( fn ( a, b ) -> _jcs_cmp(a, b) );
let parts := [];
for ( let key in keys ) {
parts.push( _jcs_encode_string( key ) _ ":" _ jcs_canonicalize( value.get( key ) ) );
}
return "{" _ join( ",", parts ) _ "}";
}
die "jcs_canonicalize: unsupported value type (" _ typeof value _ ")";
}
modules/json/canonicalization.zzm
json-canonicalization-0.0.1 source code
Package
- Name
- json-canonicalization
- Version
- 0.0.1
- Uploaded
- 2026-06-09 22:34:02
- Repository
- https://github.com/tobyink/zuzu-json-canonicalization
- Dependencies
-
-
std/data/json>= 0 -
std/math>= 0 -
std/string>= 0
-
- Metadata
- zuzu-distribution.json
- Archive
- Download .tar.gz