modules/json/canonicalization.zzm

json-canonicalization-0.0.1 source code

Package

Name
json-canonicalization
Version
0.0.1
Uploaded
2026-06-09 22:34:02
Repository
https://github.com/tobyink/zuzu-json-canonicalization
Dependencies
Metadata
zuzu-distribution.json
Archive
Download .tar.gz
=encoding utf8

=head1 NAME

json/canonicalization - JSON Canonicalization Scheme (RFC 8785).

=head1 SYNOPSIS

  from json/canonicalization import jcs_canonicalize;

  say( jcs_canonicalize( { "b": 2, "a": 1 } ) );   // {"a":1,"b":2}
  say( jcs_canonicalize( [null, true, 1.5] ) );    // [null,true,1.5]

=head1 DESCRIPTION

Implements the JSON Canonicalization Scheme defined by RFC 8785.

The single exported function C<jcs_canonicalize> accepts any
JSON-compatible ZuzuScript value (null, Boolean, Number, String,
Array, or Dict/PairList) and returns its canonical JSON string
representation.

Key properties of the canonical form:

=over

=item * Object keys are sorted by UTF-16 code unit order.

=item * No whitespace between tokens.

=item * Numbers use IEEE 754 / ES2019 C<Number::toString()> serialization.

=item * Strings use C<\"> C<\\> and shorthand C<\b \t \n \f \r> escapes;
remaining control characters (U+0000–U+001F) use lowercase C<\uXXXX>.

=back

=head1 EXPORTS

=head2 C<< jcs_canonicalize(value) -> String >>

Returns the RFC 8785 canonical JSON serialization of C<value>.

=head1 COPYRIGHT AND LICENCE

B<< json/canonicalization >> is copyright Toby Inkster.

It is free software; you may redistribute it and/or modify it under
the terms of either the Artistic License 1.0 or the GNU General Public
License version 2.

=cut

from std/data/json import JSON;
from std/math import Math;
from std/string import join, ord, sprint, substr;

let _jcs_json := new JSON();

const _JCS_HEX := "0123456789abcdef";

function _jcs_hex4 ( Number n ) {
	let value := n;
	let out := "";
	while ( value > 0 ) {
		let digit := value mod 16;
		out := substr( _JCS_HEX, digit, 1 ) _ out;
		value := int( value / 16 );
	}
	out := "0" if out eq "";
	while ( length out < 4 ) {
		out := "0" _ out;
	}
	return out;
}

function _jcs_encode_string ( String text ) {
	let out := "\"";
	let i := 0;
	while ( i < length text ) {
		let ch := substr( text, i, 1 );
		let code := ord( text, i );
		if ( ch eq "\"" ) {
			out _= "\\\"";
		}
		else if ( ch eq "\\" ) {
			out _= "\\\\";
		}
		else if ( code = 8 ) {
			out _= "\\b";
		}
		else if ( code = 9 ) {
			out _= "\\t";
		}
		else if ( code = 10 ) {
			out _= "\\n";
		}
		else if ( code = 12 ) {
			out _= "\\f";
		}
		else if ( code = 13 ) {
			out _= "\\r";
		}
		else if ( code < 32 ) {
			out _= "\\u" _ _jcs_hex4( code );
		}
		else {
			out _= ch;
		}
		i++;
	}
	out _= "\"";
	return out;
}

function _jcs_floor_log10 ( Number v ) {
	let raw := Math.log10(v);
	// int() truncates toward zero, so negative values need manual floor
	let e := int(raw);
	if ( raw < 0 and raw != e ) { e := e - 1; }
	// Verify and correct for floating-point imprecision in log10
	if ( v >= Math.pow(10, e + 1) ) { e := e + 1; }
	else if ( v < Math.pow(10, e) )  { e := e - 1; }
	return e;
}

function _jcs_number_to_string ( Number value ) {
	if ( not (value > 0) and not (value < 0) ) { return "0"; }

	let runtime := __system__{runtime};

	// Perl: sprint %g gives the correct IEEE 754 shortest decimal (16 sig digits).
	if ( runtime eq "Zuzu::Runtime" ) {
		return sprint("%.16g", value);
	}

	// JS: native String() is already ES2019-correct.
	if ( runtime eq "zuzu-js" ) {
		return "" _ value;
	}

	// Rust: Ryū is correct for floats, but very large/small values need manual
	// scientific notation because the JSON encoder emits integer form for e >= 21.
	let sign := "";
	let abs_v := value;
	if ( value < 0 ) {
		sign := "-";
		abs_v := 0 - value;
	}

	let e := _jcs_floor_log10(abs_v);

	if ( e >= 21 or e <= -7 ) {
		let mantissa := abs_v / Math.pow(10, e);
		let ms := _jcs_json.encode(mantissa);
		let exp_part := e >= 0 ? ("e+" _ e) : ("e" _ e);
		return sign _ ms _ exp_part;
	}

	return _jcs_json.encode(value);
}

// Compare two strings by UTF-16 code unit order as required by RFC 8785 §3.2.3.
function _jcs_cmp ( String a, String b ) {
	let la := length a;
	let lb := length b;
	let lm := la < lb ? la : lb;
	let i := 0;
	while ( i < lm ) {
		let ca := ord(a, i);
		let cb := ord(b, i);
		if ( ca != cb ) {
			// For non-BMP (ca/cb >= 65536) use the high surrogate value.
			let ua := ca < 65536 ? ca : (55296 + int( (ca - 65536) / 1024 ));
			let ub := cb < 65536 ? cb : (55296 + int( (cb - 65536) / 1024 ));
			if ( ua != ub ) { return ua < ub ? -1 : 1; }
			// Same high surrogate: compare low surrogates.
			let va := 56320 + (ca - 65536) mod 1024;
			let vb := 56320 + (cb - 65536) mod 1024;
			return va < vb ? -1 : 1;
		}
		i++;
	}
	return la < lb ? -1 : (la > lb ? 1 : 0);
}

function _jcs_pairlist_keys ( PairList obj ) {
	let seen := {};
	let out := [];
	for ( let pair in obj.to_Array() ) {
		next if seen.exists( pair.key );
		seen.set( pair.key, true );
		out.push( pair.key );
	}
	return out;
}

function _jcs_pairlist_get ( PairList obj, String key ) {
	for ( let pair in obj.to_Array() ) {
		return pair.value if pair.key eq key;
	}
	return null;
}

function jcs_canonicalize ( value ) {
	if ( value == null ) {
		return "null";
	}
	if ( value instanceof Boolean ) {
		return value ? "true" : "false";
	}
	if ( value instanceof Number ) {
		return _jcs_number_to_string(value);
	}
	if ( value instanceof String ) {
		return _jcs_encode_string( value );
	}
	if ( value instanceof Array ) {
		let parts := [];
		for ( let item in value ) {
			parts.push( jcs_canonicalize( item ) );
		}
		return "[" _ join( ",", parts ) _ "]";
	}
	if ( value instanceof PairList ) {
		let keys := _jcs_pairlist_keys( value ).sort( fn ( a, b ) -> _jcs_cmp(a, b) );
		let parts := [];
		for ( let key in keys ) {
			let v := _jcs_pairlist_get( value, key );
			parts.push( _jcs_encode_string( key ) _ ":" _ jcs_canonicalize( v ) );
		}
		return "{" _ join( ",", parts ) _ "}";
	}
	if ( value instanceof Dict ) {
		let keys := value.keys().sort( fn ( a, b ) -> _jcs_cmp(a, b) );
		let parts := [];
		for ( let key in keys ) {
			parts.push( _jcs_encode_string( key ) _ ":" _ jcs_canonicalize( value.get( key ) ) );
		}
		return "{" _ join( ",", parts ) _ "}";
	}
	die "jcs_canonicalize: unsupported value type (" _ typeof value _ ")";
}