[evolvis-commits] r17193: now get the string encoding thing right, and minijson encoder is finished

mirabilos at evolvis.org mirabilos at evolvis.org
Wed Apr 13 14:33:02 CEST 2011


Author: mirabilos
Date: 2011-04-13 14:33:02 +0200 (Wed, 13 Apr 2011)
New Revision: 17193

Modified:
   trunk/gforge_base/evolvisforge/gforge/common/include/minijson.php
   trunk/gforge_base/evolvisforge/gforge/debian/changelog
Log:
now get the string encoding thing right, and minijson encoder is finished

shockingly, JSON strings aren’t binary-safe (must be UTF-8)
and PHP is broken (unsurprisingly) again…


Modified: trunk/gforge_base/evolvisforge/gforge/common/include/minijson.php
===================================================================
--- trunk/gforge_base/evolvisforge/gforge/common/include/minijson.php	2011-04-13 12:33:00 UTC (rev 17192)
+++ trunk/gforge_base/evolvisforge/gforge/common/include/minijson.php	2011-04-13 12:33:02 UTC (rev 17193)
@@ -65,8 +65,33 @@
 	}
 	if (is_string($x)) {
 		$rs = "\"";
-		foreach (str_split($x) as $v) {
-			$y = ord($v);
+		/*
+		 * A bit unbelievable: not only does mb_check_encoding
+		 * not exist from the start, but also does it not check
+		 * reliably – so converting forth and back is the way
+		 * they recommend… also, JSON is not binary-safe either…
+		 */
+		$isunicode = false;
+		$mb_encoding = false;
+		if (function_exists('mb_internal_encoding') &&
+		    function_exists('mb_convert_encoding')) {
+			$mb_encoding = mb_internal_encoding();
+			mb_internal_encoding("UTF-8");
+			$z = mb_convert_encoding($x, "UTF-16LE", "UTF-8");
+			$y = mb_convert_encoding($z, "UTF-8", "UTF-16LE");
+			$isunicode = ($y == $x);
+		}
+		if ($isunicode) {
+			$z = str_split($z, 2);
+		} else {
+			$z = str_split($x);
+		}
+
+		foreach ($z as $v) {
+			$y = ord($v[0]);
+			if ($isunicode) {
+				$y |= ord($v[1]) << 8;
+			}
 			if ($y == 8) {
 				$rs .= "\\b";
 			} else if ($y == 9) {
@@ -77,16 +102,24 @@
 				$rs .= "\\f";
 			} else if ($y == 13) {
 				$rs .= "\\r";
-			} else if ($y < 0x20 || ($y > 0x7E && $y < 0xA0)) {
+			} else if ($y == 34) {
+				$rs .= "\\\"";
+			} else if ($y == 92) {
+				$rs .= "\\\\";
+			} else if ($y < 0x20 || $y > 0xFFFD ||
+			    ($y >= 0xD800 && $y <= 0xDFFF) ||
+			    ($y > 0x7E && (!$isunicode || $y < 0xA0))) {
 				$rs .= sprintf("\\u%04X", $y);
-			} else if ($y > 0xFFFD) {
-				/* XXX encode as UTF-16 */
-				$rs .= "\\uFFFD";
-			} else if ($v == "\"" || $v == "\\") {
-				$rs .= "\\".$v;
-			} else
-				$rs .= $v;
+			} else if ($isunicode && $y > 0x7E) {
+				$rs .= mb_convert_encoding($v, "UTF-8",
+				    "UTF-16LE");
+			} else {
+				$rs .= $v[0];
+			}
 		}
+		if ($mb_encoding !== false) {
+			mb_internal_encoding($mb_encoding);
+		}
 		return $rs."\"";
 	}
 	if (is_array($x)) {

Modified: trunk/gforge_base/evolvisforge/gforge/debian/changelog
===================================================================
--- trunk/gforge_base/evolvisforge/gforge/debian/changelog	2011-04-13 12:33:00 UTC (rev 17192)
+++ trunk/gforge_base/evolvisforge/gforge/debian/changelog	2011-04-13 12:33:02 UTC (rev 17193)
@@ -8,8 +8,9 @@
   * Improve debugging of database errors
   * New function ffDebug(type,intro,<pre>text)
   * [#944] In Power Queries, multiple submitters may be selected
+  * The minijson encoder is now complete
 
- -- Thorsten Glaser <t.glaser at tarent.de>  Wed, 13 Apr 2011 11:33:07 +0200
+ -- Thorsten Glaser <t.glaser at tarent.de>  Wed, 13 Apr 2011 13:36:04 +0200
 
 gforge (4.8.3+evolvis33) unstable; urgency=high
 



More information about the evolvis-commits mailing list