[tarent-useful-scripts SCM] git branch master updated. e17163defcf9c600f62bd5a89ebf7495231713b4
mirabilos at evolvis.org
mirabilos at evolvis.org
Wed Feb 28 23:29:54 CET 2018 • <20180228222954.AB2EF4A01A0@evolvis.org>
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "Main git repository for Evolvis project useful-scripts".
The branch, master has been updated
via e17163defcf9c600f62bd5a89ebf7495231713b4 (commit)
from 07ad39670becf5822f5e51e2ca4a1d36fad977ce (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
commit e17163defcf9c600f62bd5a89ebf7495231713b4
Author: mirabilos <m at mirbsd.org>
Date: Wed Feb 28 22:30:14 2018 +0000
update from h-p-w master copy
-----------------------------------------------------------------------
Summary of changes:
misc/minijson.php | 1260 +++++++++++++++++++++++++++++------------------------
1 file changed, 683 insertions(+), 577 deletions(-)
diff --git a/misc/minijson.php b/misc/minijson.php
index 6124d8d..bfb5310 100644
--- a/misc/minijson.php
+++ b/misc/minijson.php
@@ -1,7 +1,9 @@
<?php
-if (count(get_included_files()) === 1) define('__main__', __FILE__);
+if (count(get_included_files()) <= 1 && !defined('__main__'))
+ define('__main__', __FILE__);
/**
- * Minimal complete JSON generator and parser for FusionForge and SimKolab
+ * Minimal complete JSON generator and parser for FusionForge/Evolvis
+ * and SimKolab, including for debugging output serialisation
*
* Copyright © 2010, 2011, 2012, 2014, 2016, 2017
* mirabilos <t.glaser at tarent.de>
@@ -22,8 +24,8 @@ if (count(get_included_files()) === 1) define('__main__', __FILE__);
* of said person’s immediate fault when using the work as intended.
*-
* Do *not* use PHP’s json_encode because it is broken.
- * Note that JSON is case-sensitive. My notes are at:
- * https://www.mirbsd.org/cvs.cgi/contrib/hosted/tg/json.txt?rev=HEAD
+ * Note that JSON is case-sensitive and not binary-safe. My notes at:
+ * http://www.mirbsd.org/cvs.cgi/contrib/hosted/tg/code/MirJSON/json.txt?rev=HEAD
*
* Call as CLI script to filter input as JSON pretty-printer. Options
* are -c (compact output, no indentation or spaces), -d depth (parse
@@ -32,689 +34,793 @@ if (count(get_included_files()) === 1) define('__main__', __FILE__);
*/
/*-
- * I was really, really bad at writing parsers. I still am really bad at
- * writing parsers.
- * -- Rasmus Lerdorf
+ * I was really, really bad at writing parsers.
+ * I still am really bad at writing parsers.
+ * -- Rasmus Lerdorf
*/
/**
- * Encodes an array (indexed or associative) as JSON.
+ * Encodes an array (indexed or associative) or any value as JSON.
+ * See minijson_encode_ob_string() for limitations on strings;
+ * strings not encoded in UTF-8 and resources may not round-trip.
*
- * in: array x (Value to be encoded)
- * in: string indent or bool false to skip beautification
+ * in: array x (Value to be encoded)
+ * in: string (optional) or bool false to skip beautification (default: '')
* in: integer (optional) recursion depth (default: 32)
- * out: string encoded
+ * in: integer (optional) truncation size (default 0 to not truncate),
+ * makes output invalid JSON
+ * in: bool (optional) whether to pretty-print resources (default: false)
+ * out: string encoded
*/
-function minijson_encode($x, $ri='', $depth=32) {
- return (minijson_encode_internal($x, $ri, $depth, 0, false));
+function minijson_encode($x, $ri='', $depth=32, $truncsz=0, $dumprsrc=false) {
+ ob_start();
+ minijson_encode_ob($x, $ri, $depth, $truncsz, $dumprsrc);
+ return ob_get_clean();
}
/**
- * Encodes content as JSON for debugging (not round-trip safe).
+ * Encodes a string as JSON. NUL terminates strings; strings
+ * not comprised of only valid UTF-8 are interpreted as latin1.
*
- * in: array x (Value to be encoded)
- * in: string indent or bool false to skip beautification
+ * in: string x (Value to be encoded)
+ * in: integer (optional) truncation size (default 0 to not truncate),
+ * makes output invalid JSON
+ * out: stdout encoded
+ */
+function minijson_encode_ob_string($x, $truncsz=0) {
+ if (!is_string($x))
+ $x = strval($x);
+
+ $Sx = strlen($x);
+
+ if ($truncsz && ($Sx > $truncsz)) {
+ echo 'TOO_LONG_STRING_TRUNCATED:';
+ $Sx = $truncsz;
+ }
+ echo '"';
+
+ /* assume UTF-8 first, for sanity */
+ ob_start(); /* in case a restart is needed */
+
+ $Sp = 0;
+ minijson_encode_string_utf8:
+ if ($Sp >= $Sx) {
+ ob_end_flush();
+ echo '"';
+ return;
+ }
+
+ /* read next octet */
+ $c = ord(($ch = $x[$Sp++]));
+
+ if ($c === 0x5C) {
+ /* just backslash */
+ echo "\\\\";
+ goto minijson_encode_string_utf8;
+ }
+
+ if ($c > 0x22 && $c < 0x7F) {
+ /* printable ASCII except space, !, " and backslash */
+ echo $ch;
+ goto minijson_encode_string_utf8;
+ }
+
+ if ($c < 0x80) {
+ /* C0 control character, space, !, " or DEL */
+ if (($c & 0x7E) === 0x20)
+ echo $ch;
+ elseif ($c === 0x22)
+ echo '\"';
+ elseif ($c === 0x08)
+ echo '\b';
+ elseif ($c === 0x09)
+ echo '\t';
+ elseif ($c === 0x0A)
+ echo '\n';
+ elseif ($c === 0x0C)
+ echo '\f';
+ elseif ($c === 0x0D)
+ echo '\r';
+ elseif (!$c)
+ $Sp = $Sx;
+ else
+ printf('\u%04X', $c);
+ goto minijson_encode_string_utf8;
+ }
+
+ /* UTF-8 lead byte */
+ if ($c < 0xC2 || $c >= 0xF8) {
+ goto minijson_encode_string_latin1;
+ } elseif ($c < 0xE0) {
+ $wc = ($c & 0x1F) << 6;
+ $wmin = 0x80;
+ $Ss = 1;
+ } elseif ($c < 0xF0) {
+ $wc = ($c & 0x0F) << 12;
+ $wmin = 0x800;
+ $Ss = 2;
+ } else {
+ $wc = ($c & 0x07) << 18;
+ $wmin = 0x10000;
+ $Ss = 3;
+ }
+ /* UTF-8 trail bytes */
+ if ($Sp + $Ss > $Sx)
+ goto minijson_encode_string_latin1;
+ while ($Ss--)
+ if (($c = ord($x[$Sp++]) ^ 0x80) <= 0x3F)
+ $wc |= $c << (6 * $Ss);
+ else
+ goto minijson_encode_string_latin1;
+ /* complete wide character */
+ if ($wc < $wmin)
+ goto minijson_encode_string_latin1;
+
+ if ($wc < 0x00A0)
+ printf('\u%04X', $wc);
+ elseif ($wc < 0x0800)
+ echo chr(0xC0 | ($wc >> 6)) .
+ chr(0x80 | ($wc & 0x3F));
+ elseif ($wc > 0xFFFD || ($wc >= 0xD800 && $wc <= 0xDFFF) ||
+ ($wc >= 0x2028 && $wc <= 0x2029)) {
+ if ($wc > 0xFFFF) {
+ if ($wc > 0x10FFFF)
+ goto minijson_encode_string_latin1;
+ /* UTF-16 */
+ $wc -= 0x10000;
+ printf('\u%04X\u%04X',
+ 0xD800 | ($wc >> 10),
+ 0xDC00 | ($wc & 0x03FF));
+ } else
+ printf('\u%04X', $wc);
+ } else
+ echo chr(0xE0 | ($wc >> 12)) .
+ chr(0x80 | (($wc >> 6) & 0x3F)) .
+ chr(0x80 | ($wc & 0x3F));
+
+ /* process next char */
+ goto minijson_encode_string_utf8;
+
+ minijson_encode_string_latin1:
+ /* failed, interpret as sorta latin1 but display only ASCII */
+ ob_end_clean();
+
+ $Sp = 0;
+ while ($Sp < $Sx && ($c = ord(($ch = $x[$Sp++])))) {
+ /* similar logic as above, just not as golfed for speed */
+ if ($c >= 0x20 && $c < 0x7F) {
+ if ($c === 0x22 || $c === 0x5C)
+ echo "\\" . $ch;
+ else
+ echo $ch;
+ } else switch ($c) {
+ case 0x08:
+ echo '\b';
+ break;
+ case 0x09:
+ echo '\t';
+ break;
+ case 0x0A:
+ echo '\n';
+ break;
+ case 0x0C:
+ echo '\f';
+ break;
+ case 0x0D:
+ echo '\r';
+ break;
+ default:
+ printf('\u%04X', $c);
+ break;
+ }
+ }
+ echo '"';
+}
+
+/**
+ * Encodes a value as JSON to the currently active output buffer.
+ * See minijson_encode() for details.
+ *
+ * in: array x (Value to be encoded)
+ * in: string indent or bool false to skip beautification
* in: integer recursion depth
- * in: integer truncation size (0 to not truncate), makes output not JSON
- * in: bool whether to pretty-print resources as strings
- * out: string encoded
+ * in: integer truncation size (0 to not truncate), makes output not JSON
+ * in: bool whether to pretty-print resources
+ * out: stdout encoded
*/
-function minijson_encode_internal($x, $ri, $depth, $truncsz, $dumprsrc) {
+function minijson_encode_ob($x, $ri, $depth, $truncsz, $dumprsrc) {
if (!$depth-- || !isset($x) || is_null($x) || (is_float($x) &&
- (is_nan($x) || is_infinite($x))))
- return 'null';
- if ($x === true)
- return 'true';
- if ($x === false)
- return 'false';
+ (is_nan($x) || is_infinite($x)))) {
+ echo 'null';
+ return;
+ }
+
+ if ($x === true) {
+ echo 'true';
+ return;
+ }
+ if ($x === false) {
+ echo 'false';
+ return;
+ }
+
if (is_int($x)) {
$y = (int)$x;
$z = strval($y);
- if ($x == $z)
- return $z;
- $x = strval($x);
+ if (strval($x) === $z) {
+ echo $z;
+ return;
+ }
+ goto minijson_encode_number;
}
+
if (is_float($x)) {
+ minijson_encode_number:
$rs = sprintf('%.14e', $x);
$v = explode('e', $rs);
$rs = rtrim($v[0], '0');
- if (substr($rs, -1) == '.') {
+ if (substr($rs, -1) === '.')
$rs .= '0';
- }
- if ($v[1] != '-0' && $v[1] != '+0') {
+ if ($v[1] !== '-0' && $v[1] !== '+0')
$rs .= 'E' . $v[1];
- }
- return $rs;
+ echo $rs;
+ return;
}
- if (is_string($x)) {
- $rs = '"';
- if ($truncsz && (strlen($x) > $truncsz)) {
- /* truncate very long texts */
- $rs = 'TOO_LONG_STRING_TRUNCATED:"';
- $x = substr($x, 0, $truncsz);
- }
+ /* strings or unknown scalars */
+ if (is_string($x) ||
+ (!is_array($x) && !is_object($x) && is_scalar($x))) {
+ minijson_encode_ob_string($x, $truncsz);
+ return;
+ }
- $x .= "\0";
- /*
- * A bit unbelievable: not only does mb_check_encoding
- * not exist from the start, but also does it not check
- * reliably — so converting forth and back is the way
- * they recommend… also, JSON is not binary-safe either…
- */
- $isunicode = false;
- $mb_encoding = false;
- if (function_exists('mb_internal_encoding') &&
- function_exists('mb_convert_encoding')) {
- $mb_encoding = mb_internal_encoding();
- mb_internal_encoding('UTF-8');
- $z = mb_convert_encoding($x, 'UTF-16LE', 'UTF-8');
- $y = mb_convert_encoding($z, 'UTF-8', 'UTF-16LE');
- $isunicode = ($y == $x);
- }
- if ($isunicode) {
- $z = str_split($z, 2);
- } else {
- $z = str_split($x);
- }
+ /* arrays, objects, resources, unknown non-scalars */
- foreach ($z as $v) {
- $y = ord($v[0]);
- if ($isunicode) {
- $y |= ord($v[1]) << 8;
- }
- if ($y == 0) {
- break;
- } elseif ($y == 8) {
- $rs .= '\b';
- } elseif ($y == 9) {
- $rs .= '\t';
- } elseif ($y == 10) {
- $rs .= '\n';
- } elseif ($y == 12) {
- $rs .= '\f';
- } elseif ($y == 13) {
- $rs .= '\r';
- } elseif ($y == 34) {
- $rs .= '\"';
- } elseif ($y == 92) {
- $rs .= '\\\\';
- } elseif ($y < 0x20 || $y > 0xFFFD ||
- ($y >= 0xD800 && $y <= 0xDFFF) ||
- ($y > 0x7E && (!$isunicode || $y < 0xA0))) {
- $rs .= sprintf('\u%04X', $y);
- } elseif ($isunicode && $y > 0x7E) {
- $rs .= mb_convert_encoding($v, 'UTF-8',
- 'UTF-16LE');
- } else {
- $rs .= $v[0];
- }
- }
- if ($mb_encoding !== false) {
- mb_internal_encoding($mb_encoding);
- }
- return $rs.'"';
+ if ($ri === false) {
+ $si = false;
+ $xi = '';
+ $xr = '';
+ $Sd = ':';
+ } else {
+ $si = $ri . ' ';
+ $xi = "\n" . $si;
+ $xr = "\n" . $ri;
+ $Sd = ': ';
}
- if (is_array($x)) {
- $k = array_keys($x);
- if (!$k) {
- return '[]';
- }
+ $Si = ',' . $xi;
- $isnum = true;
- foreach ($k as $v) {
- if (is_int($v)) {
- $y = (int)$v;
- $z = strval($y);
- if ($v != $z) {
- $isnum = false;
- break;
- }
- } else {
- $isnum = false;
- break;
+ /* arrays, potentially empty or non-associative */
+ if (is_array($x)) {
+ if (!($n = count($x))) {
+ echo '[]';
+ return;
+ }
+ ob_start();
+ echo '[';
+ for ($v = 0; $v < $n; ++$v) {
+ if (!array_key_exists($v, $x)) {
+ /* failed — sparse or associative */
+ ob_end_clean();
+ goto minijson_encode_object;
}
- }
+ echo $xi;
+ minijson_encode_ob($x[$v],
+ $si, $depth, $truncsz, $dumprsrc);
+ $xi = $Si;
+ }
+ ob_end_flush();
+ echo $xr . ']';
+ return;
+ }
- if ($isnum) {
- /* all array keys are integers */
- $s = $k;
- sort($s, SORT_NUMERIC);
- /* test keys for order and delta */
- $y = 0;
- foreach ($s as $v) {
- if ($v != $y) {
- $isnum = false;
- break;
- }
- $y++;
- }
- }
+ /* http://de2.php.net/manual/en/function.is-resource.php#103942 */
+ if (!is_object($x) && !is_null($rsrctype = @get_resource_type($x))) {
+ if (!$dumprsrc) {
+ minijson_encode_ob_string($x, $truncsz);
+ return;
+ }
+ $rs = array(
+ '_strval' => strval($x),
+ '_type' => $rsrctype,
+ );
+ switch ($rsrctype) {
+ case 'stream':
+ $rs['info'] = stream_get_meta_data($x);
+ break;
+ case 'curl':
+ $rs['info'] = curl_getinfo($x);
+ $rs['private'] = curl_getinfo($x, CURLINFO_PRIVATE);
+ break;
+ case 'GMP integer':
+ $rs['value'] = gmp_strval($x);
+ break;
+ case 'OpenSSL key':
+ $rs['info'] = openssl_pkey_get_details($x);
+ break;
+ case 'pgsql link':
+ case 'pgsql link persistent':
+ $rs['err'] = pg_last_error($x); // must be first
+ $rs['db'] = pg_dbname($x);
+ $rs['host'] = pg_host($x);
+ $rs['status'] = pg_connection_status($x);
+ $rs['txn'] = pg_transaction_status($x);
+ break;
+ case 'pgsql result':
+ $rs['status'] = pg_result_status($x, PGSQL_STATUS_STRING);
+ break;
+ }
+ echo '{' . $xi . '"\u0000resource"' . $Sd;
+ minijson_encode_ob($rs, $si, $depth + 1, $truncsz, $dumprsrc);
+ echo $xr . '}';
+ return;
+ }
- $si = $ri === false ? false : $ri . ' ';
- $first = true;
- if ($isnum) {
- /* all array keys are integers 0‥n */
- $rs = '[';
- if ($ri !== false)
- $rs .= "\n";
- foreach ($s as $v) {
- if ($first)
- $first = false;
- elseif ($ri === false)
- $rs .= ',';
- else
- $rs .= ",\n";
- if ($si !== false)
- $rs .= $si;
- $rs .= minijson_encode_internal($x[$v], $si,
- $depth, $truncsz, $dumprsrc);
- }
- if ($ri !== false)
- $rs .= "\n" . $ri;
- $rs .= ']';
- return $rs;
- }
+ /* treat everything else as Object */
- $rs = '{';
- if ($ri !== false)
- $rs .= "\n";
- sort($k, SORT_STRING);
- foreach ($k as $v) {
- if ($first)
- $first = false;
- elseif ($ri === false)
- $rs .= ',';
- else
- $rs .= ",\n";
- if ($si !== false)
- $rs .= $si;
- $rs .= minijson_encode_internal(strval($v), false,
- $depth, $truncsz, $dumprsrc);
- if ($ri === false)
- $rs .= ':';
- else
- $rs .= ': ';
- $rs .= minijson_encode_internal($x[$v], $si,
- $depth, $truncsz, $dumprsrc);
- }
- if ($ri !== false)
- $rs .= "\n" . $ri;
- $rs .= '}';
- return $rs;
- }
- if (is_object($x)) {
- /* PHP objects are mostly like associative arrays */
- $x = (array)$x;
- $k = array();
- foreach (array_keys($x) as $v) {
- /* protected and private members have NULs there */
- $k[$v] = preg_replace('/^\0([a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*|\*)\0(.)/',
- '\\\\$1\\\\$2', $v);
- }
- if (!$k) {
- return '{}';
- }
- $si = $ri === false ? false : $ri . ' ';
- $first = true;
- $rs = '{';
- if ($ri !== false)
- $rs .= "\n";
- asort($k, SORT_STRING);
- foreach ($k as $v => $s) {
- if ($first)
- $first = false;
- elseif ($ri === false)
- $rs .= ',';
- else
- $rs .= ",\n";
- if ($si !== false)
- $rs .= $si;
- $rs .= minijson_encode_internal($s, false,
- $depth, $truncsz, $dumprsrc);
- if ($ri === false)
- $rs .= ':';
- else
- $rs .= ': ';
- $rs .= minijson_encode_internal($x[$v], $si,
- $depth, $truncsz, $dumprsrc);
- }
- if ($ri !== false)
- $rs .= "\n" . $ri;
- $rs .= '}';
- return $rs;
+ /* PHP objects are mostly like associative arrays */
+ if (!($x = (array)$x)) {
+ echo '{}';
+ return;
}
-
- /* http://de2.php.net/manual/en/function.is-resource.php#103942 */
- if ($dumprsrc && !is_null($rsrctype = @get_resource_type($x))) {
- $k = strval($rsrctype);
- $rs = '{';
- if ($ri !== false)
- $rs .= "\n" . $ri . ' ';
- $rs .= '"\u0000resource":';
- if ($ri !== false)
- $rs .= ' ';
- $rs .= minijson_encode_internal($k, false,
- $depth, $truncsz, $dumprsrc);
- if ($ri !== false)
- $rs .= "\n" . $ri;
- $rs .= '}';
- return $rs;
- }
-
- /* treat everything else as array or string */
- return minijson_encode_internal(is_scalar($x) ? strval($x) : (array)$x,
- $ri, $depth, $truncsz, $dumprsrc);
+ minijson_encode_object:
+ $s = array();
+ foreach (array_keys($x) as $k) {
+ $v = $k;
+ if (!is_string($v))
+ $v = strval($v);
+ /* protected and private members have NULs there */
+ if (strpos($v, "\0") !== false)
+ $v = str_replace("\0", "\\", $v);
+ $s[$k] = $v;
+ }
+ asort($s, SORT_STRING);
+ echo '{';
+ foreach ($s as $k => $v) {
+ echo $xi;
+ minijson_encode_ob_string($v, $truncsz);
+ echo $Sd;
+ minijson_encode_ob($x[$k], $si, $depth, $truncsz, $dumprsrc);
+ $xi = $Si;
+ }
+ echo $xr . '}';
}
/**
* Decodes a UTF-8 string from JSON (ECMA 262).
+ * Empty Objects are returned as empty PHP arrays and thus
+ * trip around as empty Arrays.
*
- * in: string json
- * in: reference output-variable (or error string)
+ * in: string JSON text to decode
+ * in: reference output Value (or error string)
* in: integer (optional) recursion depth (default: 32)
- * out: boolean false if an error occured, true = output is valid
+ * out: boolean false if an error occured, true if the output is valid
*/
-function minijson_decode($sj, &$ov, $depth=32) {
- if (!isset($sj) || !$sj) {
+function minijson_decode($s, &$ov, $depth=32) {
+ if (!isset($s))
+ $s = '';
+ elseif (!is_string($s))
+ $s = strval($s);
+
+ $Sp = 0;
+ $Sx = strlen($s);
+ $rv = false;
+
+ /* skip Byte Order Mark if present */
+ if (substr($s, 0, 3) === "\xEF\xBB\xBF")
+ $Sp = 3;
+
+ /* skip leading whitespace */
+ minijson_skip_wsp($s, $Sp, $Sx);
+
+ /* recursively parse input */
+ if ($Sp < $Sx)
+ $rv = minijson_decode_value($s, $Sp, $Sx, $ov, $depth);
+ else
$ov = 'empty input';
- return false;
- }
-
- /* mb_convert_encoding simply must exist for the decoder */
- $mb_encoding = mb_internal_encoding();
- mb_internal_encoding('UTF-8');
-
- /* see note about mb_check_encoding in the JSON encoder… */
- $wj = mb_convert_encoding($sj, 'UTF-16LE', 'UTF-8');
- $mj = mb_convert_encoding($wj, 'UTF-8', 'UTF-16LE');
- $rv = ($mj == $sj);
- unset($sj);
- unset($mj);
-
- if ($rv) {
- /* convert UTF-16LE string to array of wchar_t */
- $j = array();
- foreach (str_split($wj, 2) as $v) {
- $wc = ord($v[0]) | (ord($v[1]) << 8);
- $j[] = $wc;
- }
- $j[] = 0;
- unset($wj);
-
- /* skip Byte Order Mark if present */
- $p = 0;
- if ($j[$p] == 0xFEFF)
- $p++;
-
- /* parse recursively */
- $rv = minijson_decode_value($j, $p, $ov, $depth);
- } else {
- $ov = 'input not valid UTF-8';
- }
+ /* skip trailing whitespace */
if ($rv) {
- /* skip optional whitespace after tokens */
- minijson_skip_wsp($j, $p);
-
+ minijson_skip_wsp($s, $Sp, $Sx);
/* end of string? */
- if ($j[$p] !== 0) {
- /* no, trailing waste */
- $ov = 'expected EOS at wchar #' . $p;
+ if ($Sp < $Sx) {
+ $ov = 'expected EOS';
$rv = false;
}
}
- mb_internal_encoding($mb_encoding);
+ /* amend errors by erroring offset */
+ if (!$rv)
+ $ov = sprintf('%s at offset 0x%0' . strlen(dechex($Sx)) . 'X',
+ $ov, $Sp);
return $rv;
}
-function minijson_skip_wsp(&$j, &$p) {
- /* skip all wide characters that are JSON whitespace */
- do {
- $wc = $j[$p++];
- } while ($wc == 0x09 || $wc == 0x0A || $wc == 0x0D || $wc == 0x20);
- $p--;
-}
-
-function minijson_get_hexdigit(&$j, &$p, &$v, $i) {
- $wc = $j[$p++];
- if ($wc >= 0x30 && $wc <= 0x39) {
- $v += $wc - 0x30;
- } elseif ($wc >= 0x41 && $wc <= 0x46) {
- $v += $wc - 0x37;
- } elseif ($wc >= 0x61 && $wc <= 0x66) {
- $v += $wc - 0x57;
- } else {
- $ov = sprintf('invalid hex in unicode escape' .
- ' sequence (%d) at wchar #%u', $i, $p);
- return false;
- }
- return true;
+/* skip all characters that are JSON whitespace */
+function minijson_skip_wsp($s, &$Sp, $Sx) {
+ while ($Sp < $Sx)
+ switch (ord($s[$Sp])) {
+ default:
+ return;
+ case 0x09:
+ case 0x0A:
+ case 0x0D:
+ case 0x20:
+ ++$Sp;
+ }
}
-function minijson_decode_array(&$j, &$p, &$ov, $depth) {
+function minijson_decode_array($s, &$Sp, $Sx, &$ov, $depth) {
$ov = array();
- $first = true;
- /* I wish there were a goto in PHP… */
- while (true) {
- /* skip optional whitespace between tokens */
- minijson_skip_wsp($j, $p);
+ /* skip optional whitespace between tokens */
+ minijson_skip_wsp($s, $Sp, $Sx);
- /* end of the array? */
- if ($j[$p] == 0x5D) {
- /* regular exit point for the loop */
+ /* check for end of array or first member */
+ if ($Sp >= $Sx) {
+ minijson_decode_array_eos:
+ $ov = 'unexpected EOS in Array';
+ return false;
+ }
+ switch ($s[$Sp]) {
+ case ',':
+ $ov = 'unexpected leading comma in Array';
+ return false;
+ case ']':
+ ++$Sp;
+ return true;
+ }
- $p++;
- return true;
- }
+ goto minijson_decode_array_member;
- /* member separator? */
- if ($j[$p] == 0x2C) {
- $p++;
- if ($first) {
- /* no comma before the first member */
- $ov = 'unexpected comma at wchar #' . $p;
- return false;
- }
- } elseif (!$first) {
- /*
- * all but the first member require a separating
- * comma; this also catches e.g. trailing
- * rubbish after numbers
- */
- $ov = 'expected comma at wchar #' . $p;
- return false;
- }
- $first = false;
+ minijson_decode_array_loop:
+ /* skip optional whitespace between tokens */
+ minijson_skip_wsp($s, $Sp, $Sx);
+
+ /* check for end of array or next member */
+ if ($Sp >= $Sx)
+ goto minijson_decode_array_eos;
+ switch ($s[$Sp++]) {
+ case ']':
+ return true;
+ case ',':
+ break;
+ default:
+ --$Sp;
+ $ov = 'missing comma in Array';
+ return false;
+ }
- /* parse the member value */
- $v = NULL;
- if (!minijson_decode_value($j, $p, $v, $depth)) {
- /* pass through error code */
- $ov = $v;
- return false;
- }
- $ov[] = $v;
+ minijson_decode_array_member:
+ /* parse the member value */
+ $v = NULL;
+ if (!minijson_decode_value($s, $Sp, $Sx, $v, $depth)) {
+ /* pass through error code */
+ $ov = $v;
+ return false;
}
+ /* consume, rinse, repeat */
+ $ov[] = $v;
+ goto minijson_decode_array_loop;
}
-function minijson_decode_object(&$j, &$p, &$ov, $depth) {
+function minijson_decode_object($s, &$Sp, $Sx, &$ov, $depth) {
$ov = array();
- $first = true;
-
- while (true) {
- /* skip optional whitespace between tokens */
- minijson_skip_wsp($j, $p);
-
- /* end of the object? */
- if ($j[$p] == 0x7D) {
- /* regular exit point for the loop */
-
- $p++;
- return true;
- }
+ /* skip optional whitespace between tokens */
+ minijson_skip_wsp($s, $Sp, $Sx);
- /* member separator? */
- if ($j[$p] == 0x2C) {
- $p++;
- if ($first) {
- /* no comma before the first member */
- $ov = 'unexpected comma at wchar #' . $p;
- return false;
- }
- } elseif (!$first) {
- /*
- * all but the first member require a separating
- * comma; this also catches e.g. trailing
- * rubbish after numbers
- */
- $ov = 'expected comma at wchar #' . $p;
- return false;
- }
- $first = false;
+ /* check for end of object or first member */
+ if ($Sp >= $Sx) {
+ minijson_decode_object_eos:
+ $ov = 'unexpected EOS in Object';
+ return false;
+ }
+ switch ($s[$Sp]) {
+ case ',':
+ $ov = 'unexpected leading comma in Object';
+ return false;
+ case '}':
+ ++$Sp;
+ return true;
+ }
- /* skip optional whitespace between tokens */
- minijson_skip_wsp($j, $p);
+ goto minijson_decode_object_member;
- /* parse the member key */
- if ($j[$p++] != 0x22) {
- $ov = 'expected key string at wchar #' . $p;
- return false;
- }
- $k = null;
- if (!minijson_decode_string($j, $p, $k)) {
- /* pass through error code */
- $ov = $k;
- return false;
- }
+ minijson_decode_object_loop:
+ /* skip optional whitespace between tokens */
+ minijson_skip_wsp($s, $Sp, $Sx);
+
+ /* check for end of object or next member */
+ if ($Sp >= $Sx)
+ goto minijson_decode_object_eos;
+ switch ($s[$Sp++]) {
+ case '}':
+ return true;
+ case ',':
+ break;
+ default:
+ --$Sp;
+ $ov = 'missing comma in Object';
+ return false;
+ }
- /* skip optional whitespace between tokens */
- minijson_skip_wsp($j, $p);
+ minijson_decode_object_member:
+ /* skip optional whitespace between tokens */
+ minijson_skip_wsp($s, $Sp, $Sx);
+
+ /* look for the member key */
+ if ($Sp >= $Sx)
+ goto minijson_decode_object_eos;
+ if ($s[$Sp++] !== '"') {
+ --$Sp;
+ $ov = 'expected key string for Object member';
+ return false;
+ }
+ if (($k = minijson_decode_string($s, $Sp, $Sx)) !== true) {
+ ob_end_clean();
+ /* pass through error code */
+ $ov = $k;
+ return false;
+ }
+ $k = ob_get_clean();
- /* key-value separator? */
- if ($j[$p++] != 0x3A) {
- $ov = 'expected colon at wchar #' . $p;
- return false;
- }
+ /* skip optional whitespace between tokens */
+ minijson_skip_wsp($s, $Sp, $Sx);
+
+ /* check for separator between key and value */
+ if ($Sp >= $Sx)
+ goto minijson_decode_object_eos;
+ if ($s[$Sp++] !== ':') {
+ --$Sp;
+ $ov = 'expected colon in Object member';
+ return false;
+ }
- /* parse the member value */
- $v = NULL;
- if (!minijson_decode_value($j, $p, $v, $depth)) {
- /* pass through error code */
- $ov = $v;
- return false;
- }
- $ov[$k] = $v;
+ /* parse the member value */
+ $v = NULL;
+ if (!minijson_decode_value($s, $Sp, $Sx, $v, $depth)) {
+ /* pass through error code */
+ $ov = $v;
+ return false;
}
+ /* consume, rinse, repeat */
+ $ov[$k] = $v;
+ goto minijson_decode_object_loop;
}
-function minijson_decode_value(&$j, &$p, &$ov, $depth) {
+function minijson_decode_value($s, &$Sp, $Sx, &$ov, $depth) {
/* skip optional whitespace between tokens */
- minijson_skip_wsp($j, $p);
+ minijson_skip_wsp($s, $Sp, $Sx);
/* parse begin of Value token */
- $wc = $j[$p++];
+ if ($Sp >= $Sx) {
+ $ov = 'unexpected EOS, Value expected';
+ return false;
+ }
+ $c = $s[$Sp++];
/* style: falling through exits with false */
- if ($wc == 0) {
- $ov = 'unexpected EOS at wchar #' . $p;
- } elseif ($wc == 0x6E) {
+ if ($c === 'n') {
/* literal null? */
- if ($j[$p++] == 0x75 &&
- $j[$p++] == 0x6C &&
- $j[$p++] == 0x6C) {
+ if (substr($s, $Sp, 3) === 'ull') {
+ $Sp += 3;
$ov = NULL;
return true;
}
- $ov = 'expected ull after n near wchar #' . $p;
- } elseif ($wc == 0x74) {
+ --$Sp;
+ $ov = 'expected “ull” after “n”';
+ } elseif ($c === 't') {
/* literal true? */
- if ($j[$p++] == 0x72 &&
- $j[$p++] == 0x75 &&
- $j[$p++] == 0x65) {
+ if (substr($s, $Sp, 3) === 'rue') {
+ $Sp += 3;
$ov = true;
return true;
}
- $ov = 'expected rue after t near wchar #' . $p;
- } elseif ($wc == 0x66) {
+ --$Sp;
+ $ov = 'expected “rue” after “t”';
+ } elseif ($c === 'f') {
/* literal false? */
- if ($j[$p++] == 0x61 &&
- $j[$p++] == 0x6C &&
- $j[$p++] == 0x73 &&
- $j[$p++] == 0x65) {
+ if (substr($s, $Sp, 4) === 'alse') {
+ $Sp += 4;
$ov = false;
return true;
}
- $ov = 'expected alse after f near wchar #' . $p;
- } elseif ($wc == 0x5B) {
- if (--$depth > 0) {
- return minijson_decode_array($j, $p, $ov, $depth);
- }
- $ov = 'recursion limit exceeded at wchar #' . $p;
- } elseif ($wc == 0x7B) {
- if (--$depth > 0) {
- return minijson_decode_object($j, $p, $ov, $depth);
+ --$Sp;
+ $ov = 'expected “alse” after “f”';
+ } elseif ($c === '[') {
+ if (--$depth > 0)
+ return minijson_decode_array($s, $Sp, $Sx, $ov, $depth);
+ --$Sp;
+ $ov = 'recursion limit exceeded by Array';
+ } elseif ($c === '{') {
+ if (--$depth > 0)
+ return minijson_decode_object($s, $Sp, $Sx, $ov, $depth);
+ --$Sp;
+ $ov = 'recursion limit exceeded by Object';
+ } elseif ($c === '"') {
+ if (($ov = minijson_decode_string($s, $Sp, $Sx)) !== true) {
+ ob_end_clean();
+ return false;
}
- $ov = 'recursion limit exceeded at wchar #' . $p;
- } elseif ($wc == 0x22) {
- return minijson_decode_string($j, $p, $ov);
- } elseif ($wc == 0x2D || ($wc >= 0x30 && $wc <= 0x39)) {
- $p--;
- return minijson_decode_number($j, $p, $ov);
+ $ov = ob_get_clean();
+ return true;
+ } elseif ($c === '-' || (ord($c) >= 0x30 && ord($c) <= 0x39)) {
+ --$Sp;
+ return minijson_decode_number($s, $Sp, $Sx, $ov);
+ } elseif (ord($c) >= 0x20 && ord($c) <= 0x7E) {
+ --$Sp;
+ $ov = "unexpected “{$c}”, Value expected";
} else {
- $ov = sprintf('unexpected U+%04X at wchar #%u', $wc, $p);
+ --$Sp;
+ $ov = sprintf('unexpected 0x%02X, Value expected', ord($c));
}
return false;
}
-function minijson_decode_string(&$j, &$p, &$ov) {
- /* UTF-16LE string buffer */
- $s = '';
-
- while (true) {
- $wc = $j[$p++];
- if ($wc < 0x20) {
- $ov = 'unescaped control character $wc at wchar #' . $p;
- return false;
- } elseif ($wc == 0x22) {
- /* regular exit point for the loop */
-
- /* convert to UTF-8, then re-check against UTF-16 */
- $ov = mb_convert_encoding($s, 'UTF-8', 'UTF-16LE');
- $tmp = mb_convert_encoding($ov, 'UTF-16LE', 'UTF-8');
- if ($tmp != $s) {
- $ov = 'no Unicode string before wchar #' . $p;
- return false;
+function minijson_decode_string($s, &$Sp, $Sx) {
+ ob_start();
+ minijson_decode_string_loop:
+ if ($Sp >= $Sx)
+ return 'unexpected EOS in String';
+ /* get next octet; switch on what to do with it */
+ if (($ch = $s[$Sp++]) === '"') {
+ /* regular exit point for the loop */
+ return true;
+ }
+ /* backslash escape? */
+ if ($ch === "\\") {
+ if ($Sp >= $Sx)
+ return 'unexpected EOS after backslash in String';
+ $ch = $s[$Sp++];
+ if ($ch === '"' || $ch === '/' || $ch === "\\")
+ echo $ch;
+ elseif ($ch === 't')
+ echo "\x09";
+ elseif ($ch === 'n')
+ echo "\x0A";
+ elseif ($ch === 'r')
+ echo "\x0D";
+ elseif ($ch === 'b')
+ echo "\x08";
+ elseif ($ch === 'f')
+ echo "\x0C";
+ elseif ($ch !== 'u') {
+ $Sp -= 2;
+ return "invalid escape '\\$ch' in String";
+ } else {
+ $surrogate = 0;
+ minijson_decode_string_unicode_escape:
+ $wc = 0;
+ if ($Sp + 4 > $Sx) {
+ $Sp -= 2;
+ return 'unexpected EOS in Unicode escape sequence';
}
- return true;
- } elseif ($wc == 0x5C) {
- $wc = $j[$p++];
- if ($wc == 0x22 ||
- $wc == 0x2F ||
- $wc == 0x5C) {
- $s .= chr($wc) . chr(0);
- } elseif ($wc == 0x62) {
- $s .= chr(0x08) . chr(0);
- } elseif ($wc == 0x66) {
- $s .= chr(0x0C) . chr(0);
- } elseif ($wc == 0x6E) {
- $s .= chr(0x0A) . chr(0);
- } elseif ($wc == 0x72) {
- $s .= chr(0x0D) . chr(0);
- } elseif ($wc == 0x74) {
- $s .= chr(0x09) . chr(0);
- } elseif ($wc == 0x75) {
- $v = 0;
- for ($tmp = 1; $tmp <= 4; $tmp++) {
- $v <<= 4;
- if (!minijson_get_hexdigit($j, $p,
- $v, $tmp)) {
- /* pass through error code */
- return false;
- }
+ for ($tmp = 1; $tmp <= 4; $tmp++) {
+ $wc <<= 4;
+ switch (ord($s[$Sp++])) {
+ case 0x30: break;
+ case 0x31: $wc += 1; break;
+ case 0x32: $wc += 2; break;
+ case 0x33: $wc += 3; break;
+ case 0x34: $wc += 4; break;
+ case 0x35: $wc += 5; break;
+ case 0x36: $wc += 6; break;
+ case 0x37: $wc += 7; break;
+ case 0x38: $wc += 8; break;
+ case 0x39: $wc += 9; break;
+ case 0x41: case 0x61: $wc += 10; break;
+ case 0x42: case 0x62: $wc += 11; break;
+ case 0x43: case 0x63: $wc += 12; break;
+ case 0x44: case 0x64: $wc += 13; break;
+ case 0x45: case 0x65: $wc += 14; break;
+ case 0x46: case 0x66: $wc += 15; break;
+ default:
+ --$Sp;
+ return "invalid hex digit #$tmp/4 in Unicode escape sequence";
}
- if ($v < 1 || $v > 0xFFFD) {
- $ov = 'non-Unicode escape $v before wchar #' . $p;
- return false;
+ }
+ if ($surrogate) {
+ if ($wc < 0xDC00 || $wc > 0xDFFF) {
+ $Sp -= 6;
+ return sprintf('expected low surrogate, not %04X, after high surrogate %04X', $wc, $surrogate);
}
- $s .= chr($v & 0xFF) . chr($v >> 8);
- } else {
- $ov = 'invalid escape sequence at wchar #' . $p;
- return false;
+ $wc = 0x10000 + (($surrogate & 0x03FF) << 10) + ($wc & 0x03FF);
+ } elseif ($wc >= 0xD800 && $wc <= 0xDBFF) {
+ $surrogate = $wc;
+ /* UTF-16 expects the low surrogate */
+ if (substr($s, $Sp, 2) !== '\u')
+ return 'expected Unicode escape after high surrogate';
+ $Sp += 2;
+ goto minijson_decode_string_unicode_escape;
+ } elseif ($wc >= 0xDC00 && $wc <= 0xDFFF) {
+ $Sp -= 6;
+ return sprintf('loose low surrogate %04X', $wc);
+ } elseif ($wc < 1 || $wc > 0xFFFD) {
+ $Sp -= 6;
+ return sprintf('non-Unicode escape %04X', $wc);
}
- } elseif ($wc > 0xD7FF && $wc < 0xE000) {
- $ov = 'surrogate $wc at wchar #' . $p;
- return false;
- } elseif ($wc > 0xFFFD) {
- $ov = 'non-Unicode char $wc at wchar #' . $p;
- return false;
- } else {
- $s .= chr($wc & 0xFF) . chr($wc >> 8);
+ if ($wc < 0x80) {
+ echo chr($wc);
+ goto minijson_decode_string_loop;
+ }
+ minijson_decode_string_unicode_char:
+ if ($wc < 0x0800)
+ echo chr(0xC0 | ($wc >> 6)) .
+ chr(0x80 | ($wc & 0x3F));
+ elseif ($wc <= 0xFFFF)
+ echo chr(0xE0 | ($wc >> 12)) .
+ chr(0x80 | (($wc >> 6) & 0x3F)) .
+ chr(0x80 | ($wc & 0x3F));
+ else
+ echo chr(0xF0 | ($wc >> 18)) .
+ chr(0x80 | (($wc >> 12) & 0x3F)) .
+ chr(0x80 | (($wc >> 6) & 0x3F)) .
+ chr(0x80 | ($wc & 0x3F));
}
+ goto minijson_decode_string_loop;
}
-}
-
-function minijson_decode_number(&$j, &$p, &$ov) {
- $s = '';
- $isint = true;
-
- /* check for an optional minus sign */
- $wc = $j[$p++];
- if ($wc == 0x2D) {
- $s = '-';
- $wc = $j[$p++];
+ if (($c = ord($ch)) < 0x20) {
+ --$Sp;
+ return sprintf('unescaped control character 0x%02X in String', $c);
}
-
- if ($wc == 0x30) {
- /* begins with zero (0 or 0.x) */
- $s .= '0';
- $wc = $j[$p++];
- if ($wc >= 0x30 && $wc <= 0x39) {
- $ov = 'no leading zeroes please at wchar #' . $p;
- return false;
- }
- } elseif ($wc >= 0x31 && $wc <= 0x39) {
- /* begins with 1‥9 */
- while ($wc >= 0x30 && $wc <= 0x39) {
- $s .= chr($wc);
- $wc = $j[$p++];
- }
+ if ($c < 0x80) {
+ echo $ch;
+ goto minijson_decode_string_loop;
+ }
+ /* decode UTF-8 */
+ if ($c < 0xC2 || $c >= 0xF0) {
+ --$Sp;
+ return sprintf('invalid UTF-8 lead octet 0x%02X in String', $c);
+ }
+ if ($c < 0xE0) {
+ $wc = ($c & 0x1F) << 6;
+ $wmin = 0x80; /* redundant */
+ $Ss = 1;
} else {
- $ov = 'decimal digit expected at wchar #' . $p;
- if ($s[0] != '-') {
- /* we had none, so it’s allowed to prepend one */
- $ov = 'minus sign or ' . $ov;
- }
- return false;
+ $wc = ($c & 0x0F) << 12;
+ $wmin = 0x800;
+ $Ss = 2;
}
-
- /* do we have a fractional part? */
- if ($wc == 0x2E) {
- $s .= '.';
- $isint = false;
- $wc = $j[$p++];
- if ($wc < 0x30 || $wc > 0x39) {
- $ov = 'fractional digit expected at wchar #' . $p;
- return false;
- }
- while ($wc >= 0x30 && $wc <= 0x39) {
- $s .= chr($wc);
- $wc = $j[$p++];
- }
+ if ($Sp + $Ss > $Sx) {
+ --$Sp;
+ return 'unexpected EOS after UTF-8 lead byte in String';
}
-
- /* do we have an exponent, treat number as mantissa? */
- if ($wc == 0x45 || $wc == 0x65) {
- $s .= 'E';
- $isint = false;
- $wc = $j[$p++];
- if ($wc == 0x2B || $wc == 0x2D) {
- $s .= chr($wc);
- $wc = $j[$p++];
- }
- if ($wc < 0x30 || $wc > 0x39) {
- $ov = 'exponent digit expected at wchar #' . $p;
- return false;
- }
- while ($wc >= 0x30 && $wc <= 0x39) {
- $s .= chr($wc);
- $wc = $j[$p++];
- }
+ while ($Ss--)
+ if (($c = ord($s[$Sp++]) ^ 0x80) <= 0x3F)
+ $wc |= $c << (6 * $Ss);
+ else {
+ --$Sp;
+ return sprintf('invalid UTF-8 trail octet 0x%02X in String', $c ^ 0x80);
+ }
+ if ($wc < $wmin) {
+ $Sp -= 3; /* only for E0‥EF-led sequence */
+ return sprintf('non-minimalistic encoding for Unicode char %04X in String', $wc);
}
- $p--;
- if ($isint) {
- /* no fractional part, no exponent */
+ if ($wc >= 0xD800 && $wc <= 0xDFFF) {
+ $Sp -= 3;
+ return sprintf('unescaped surrogate %04X in String', $wc);
+ }
+ if ($wc <= 0xFFFD)
+ goto minijson_decode_string_unicode_char;
+ $Sp -= 3;
+ return sprintf('non-Unicode char %04X in String', $wc);
+}
- $v = (int)$s;
- if (strval($v) == $s) {
- $ov = $v;
+function minijson_decode_number($s, &$Sp, $Sx, &$ov) {
+ $matches = array('');
+ if (!preg_match('/-?(?:0|[1-9][0-9]*)(?:\.[0-9]+)?(?:[Ee][+-]?[0-9]+)?/A',
+ $s, $matches, 0, $Sp) || strlen($matches[0]) < 1) {
+ $ov = 'expected Number';
+ return false;
+ }
+ $Sp += strlen($matches[0]);
+ if (strpos($matches[0], '.') === false) {
+ /* possible integer */
+ $ov = (int)$matches[0];
+ if (strval($ov) === $matches[0])
return true;
- }
}
- $ov = (float)$s;
+ $ov = (float)$matches[0];
return true;
}
@@ -790,7 +896,7 @@ if (defined('__main__') && constant('__main__') === __FILE__) {
)) . "\n");
exit(1);
}
- fwrite(STDOUT, minijson_encode_internal($odat, $indent, $depth,
+ fwrite(STDOUT, minijson_encode($odat, $indent, $depth,
$truncsz, $rsrc) . "\n");
exit(0);
}
hooks/post-receive
--
Main git repository for Evolvis project useful-scripts
(Evolvis project useful-scripts repository useful-scripts)
More information about the useful-scripts-commits
mailing list