1: <?php
2: /**
3: * JSON (JavaScript Object Notation) is a lightweight data-interchange
4: * format. It is easy for humans to read and write. It is easy for machines
5: * to parse and generate. It is based on a subset of the JavaScript
6: * Programming Language, Standard ECMA-262 3rd Edition - December 1999.
7: * This feature can also be found in Python. JSON is a text format that is
8: * completely language independent but uses conventions that are familiar
9: * to programmers of the C-family of languages, including C, C++, C#, Java,
10: * JavaScript, Perl, TCL, and many others. These properties make JSON an
11: * ideal data-interchange language.
12: *
13: * This package provides a simple encoder and decoder for JSON notation. It
14: * is intended for use with client-side Javascript applications that make
15: * use of HTTPRequest to perform server communication functions - data can
16: * be encoded into JSON notation for use in a client-side javascript, or
17: * decoded from incoming Javascript requests. JSON format is native to
18: * Javascript, and can be directly eval()'ed with no further parsing
19: * overhead
20: *
21: * All strings should be in ASCII or UTF-8 format!
22: *
23: * LICENSE: Redistribution and use in source and binary forms, with or
24: * without modification, are permitted provided that the following
25: * conditions are met: Redistributions of source code must retain the
26: * above copyright notice, this list of conditions and the following
27: * disclaimer. Redistributions in binary form must reproduce the above
28: * copyright notice, this list of conditions and the following disclaimer
29: * in the documentation and/or other materials provided with the
30: * distribution.
31: *
32: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
33: * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
34: * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
35: * NO EVENT SHALL CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
36: * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
37: * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
38: * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
39: * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
40: * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
41: * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
42: * DAMAGE.
43: *
44: * @author Michal Migurski <mike-json@teczno.com>
45: * @author Matt Knapp <mdknapp[at]gmail[dot]com>
46: * @author Brett Stimmerman <brettstimmerman[at]gmail[dot]com>
47: * @copyright 2005 Michal Migurski
48: * @license http://www.opensource.org/licenses/bsd-license.php
49: * @link http://pear.php.net/pepr/pepr-proposal-show.php?id=198
50: */
51:
52: /**
53: * CJSON converts PHP data to and from JSON format.
54: *
55: * @author Michal Migurski <mike-json@teczno.com>
56: * @author Matt Knapp <mdknapp[at]gmail[dot]com>
57: * @author Brett Stimmerman <brettstimmerman[at]gmail[dot]com>
58: * @package system.web.helpers
59: * @since 1.0
60: */
61: class CJSON
62: {
63: /**
64: * Marker constant for JSON::decode(), used to flag stack state
65: */
66: const JSON_SLICE = 1;
67:
68: /**
69: * Marker constant for JSON::decode(), used to flag stack state
70: */
71: const JSON_IN_STR = 2;
72:
73: /**
74: * Marker constant for JSON::decode(), used to flag stack state
75: */
76: const JSON_IN_ARR = 4;
77:
78: /**
79: * Marker constant for JSON::decode(), used to flag stack state
80: */
81: const JSON_IN_OBJ = 8;
82:
83: /**
84: * Marker constant for JSON::decode(), used to flag stack state
85: */
86: const JSON_IN_CMT = 16;
87:
88: /**
89: * Encodes an arbitrary variable into JSON format
90: *
91: * @param mixed $var any number, boolean, string, array, or object to be encoded.
92: * If var is a string, it will be converted to UTF-8 format first before being encoded.
93: * @return string JSON string representation of input var
94: */
95: public static function encode($var)
96: {
97: switch (gettype($var)) {
98: case 'boolean':
99: return $var ? 'true' : 'false';
100:
101: case 'NULL':
102: return 'null';
103:
104: case 'integer':
105: return (int) $var;
106:
107: case 'double':
108: case 'float':
109: return str_replace(',','.',(float)$var); // locale-independent representation
110:
111: case 'string':
112: if (($enc=strtoupper(Yii::app()->charset))!=='UTF-8')
113: $var=iconv($enc, 'UTF-8', $var);
114:
115: if(function_exists('json_encode'))
116: return json_encode($var);
117:
118: // STRINGS ARE EXPECTED TO BE IN ASCII OR UTF-8 FORMAT
119: $ascii = '';
120: $strlen_var = strlen($var);
121:
122: /*
123: * Iterate over every character in the string,
124: * escaping with a slash or encoding to UTF-8 where necessary
125: */
126: for ($c = 0; $c < $strlen_var; ++$c) {
127:
128: $ord_var_c = ord($var{$c});
129:
130: switch (true) {
131: case $ord_var_c == 0x08:
132: $ascii .= '\b';
133: break;
134: case $ord_var_c == 0x09:
135: $ascii .= '\t';
136: break;
137: case $ord_var_c == 0x0A:
138: $ascii .= '\n';
139: break;
140: case $ord_var_c == 0x0C:
141: $ascii .= '\f';
142: break;
143: case $ord_var_c == 0x0D:
144: $ascii .= '\r';
145: break;
146:
147: case $ord_var_c == 0x22:
148: case $ord_var_c == 0x2F:
149: case $ord_var_c == 0x5C:
150: // double quote, slash, slosh
151: $ascii .= '\\'.$var{$c};
152: break;
153:
154: case (($ord_var_c >= 0x20) && ($ord_var_c <= 0x7F)):
155: // characters U-00000000 - U-0000007F (same as ASCII)
156: $ascii .= $var{$c};
157: break;
158:
159: case (($ord_var_c & 0xE0) == 0xC0):
160: // characters U-00000080 - U-000007FF, mask 110XXXXX
161: // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
162: $char = pack('C*', $ord_var_c, ord($var{$c+1}));
163: $c+=1;
164: $utf16 = self::utf8ToUTF16BE($char);
165: $ascii .= sprintf('\u%04s', bin2hex($utf16));
166: break;
167:
168: case (($ord_var_c & 0xF0) == 0xE0):
169: // characters U-00000800 - U-0000FFFF, mask 1110XXXX
170: // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
171: $char = pack('C*', $ord_var_c,
172: ord($var{$c+1}),
173: ord($var{$c+2}));
174: $c+=2;
175: $utf16 = self::utf8ToUTF16BE($char);
176: $ascii .= sprintf('\u%04s', bin2hex($utf16));
177: break;
178:
179: case (($ord_var_c & 0xF8) == 0xF0):
180: // characters U-00010000 - U-001FFFFF, mask 11110XXX
181: // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
182: $char = pack('C*', $ord_var_c,
183: ord($var{$c+1}),
184: ord($var{$c+2}),
185: ord($var{$c+3}));
186: $c+=3;
187: $utf16 = self::utf8ToUTF16BE($char);
188: $ascii .= sprintf('\u%04s', bin2hex($utf16));
189: break;
190:
191: case (($ord_var_c & 0xFC) == 0xF8):
192: // characters U-00200000 - U-03FFFFFF, mask 111110XX
193: // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
194: $char = pack('C*', $ord_var_c,
195: ord($var{$c+1}),
196: ord($var{$c+2}),
197: ord($var{$c+3}),
198: ord($var{$c+4}));
199: $c+=4;
200: $utf16 = self::utf8ToUTF16BE($char);
201: $ascii .= sprintf('\u%04s', bin2hex($utf16));
202: break;
203:
204: case (($ord_var_c & 0xFE) == 0xFC):
205: // characters U-04000000 - U-7FFFFFFF, mask 1111110X
206: // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
207: $char = pack('C*', $ord_var_c,
208: ord($var{$c+1}),
209: ord($var{$c+2}),
210: ord($var{$c+3}),
211: ord($var{$c+4}),
212: ord($var{$c+5}));
213: $c+=5;
214: $utf16 = self::utf8ToUTF16BE($char);
215: $ascii .= sprintf('\u%04s', bin2hex($utf16));
216: break;
217: }
218: }
219:
220: return '"'.$ascii.'"';
221:
222: case 'array':
223: /*
224: * As per JSON spec if any array key is not an integer
225: * we must treat the the whole array as an object. We
226: * also try to catch a sparsely populated associative
227: * array with numeric keys here because some JS engines
228: * will create an array with empty indexes up to
229: * max_index which can cause memory issues and because
230: * the keys, which may be relevant, will be remapped
231: * otherwise.
232: *
233: * As per the ECMA and JSON specification an object may
234: * have any string as a property. Unfortunately due to
235: * a hole in the ECMA specification if the key is a
236: * ECMA reserved word or starts with a digit the
237: * parameter is only accessible using ECMAScript's
238: * bracket notation.
239: */
240:
241: // treat as a JSON object
242: if (is_array($var) && count($var) && (array_keys($var) !== range(0, sizeof($var) - 1))) {
243: return '{' .
244: join(',', array_map(array('CJSON', 'nameValue'),
245: array_keys($var),
246: array_values($var)))
247: . '}';
248: }
249:
250: // treat it like a regular array
251: return '[' . join(',', array_map(array('CJSON', 'encode'), $var)) . ']';
252:
253: case 'object':
254: // Check for the JsonSerializable interface available in PHP5.4
255: // Note that instanceof returns false in case it doesnt know the interface.
256: if (interface_exists('JsonSerializable', false) && $var instanceof JsonSerializable)
257: {
258: // We use the function defined in the interface instead of json_encode.
259: // This way even for PHP < 5.4 one could define the interface and use it.
260: return self::encode($var->jsonSerialize());
261: }
262: elseif ($var instanceof Traversable)
263: {
264: $vars = array();
265: foreach ($var as $k=>$v)
266: $vars[$k] = $v;
267: }
268: else
269: $vars = get_object_vars($var);
270: return '{' .
271: join(',', array_map(array('CJSON', 'nameValue'),
272: array_keys($vars),
273: array_values($vars)))
274: . '}';
275:
276: default:
277: return '';
278: }
279: }
280:
281: /**
282: * array-walking function for use in generating JSON-formatted name-value pairs
283: *
284: * @param string $name name of key to use
285: * @param mixed $value reference to an array element to be encoded
286: *
287: * @return string JSON-formatted name-value pair, like '"name":value'
288: * @access private
289: */
290: protected static function nameValue($name, $value)
291: {
292: return self::encode(strval($name)) . ':' . self::encode($value);
293: }
294:
295: /**
296: * reduce a string by removing leading and trailing comments and whitespace
297: *
298: * @param string $str string value to strip of comments and whitespace
299: *
300: * @return string string value stripped of comments and whitespace
301: * @access private
302: */
303: protected static function reduceString($str)
304: {
305: $str = preg_replace(array(
306:
307: // eliminate single line comments in '// ...' form
308: '#^\s*//(.+)$#m',
309:
310: // eliminate multi-line comments in '/* ... */' form, at start of string
311: '#^\s*/\*(.+)\*/#Us',
312:
313: // eliminate multi-line comments in '/* ... */' form, at end of string
314: '#/\*(.+)\*/\s*$#Us'
315:
316: ), '', $str);
317:
318: // eliminate extraneous space
319: return trim($str);
320: }
321:
322: /**
323: * decodes a JSON string into appropriate variable
324: *
325: * @param string $str JSON-formatted string
326: * @param boolean $useArray whether to use associative array to represent object data
327: * @return mixed number, boolean, string, array, or object corresponding to given JSON input string.
328: * Note that decode() always returns strings in ASCII or UTF-8 format!
329: * @access public
330: */
331: public static function decode($str, $useArray=true)
332: {
333: if(function_exists('json_decode'))
334: {
335: $json = json_decode($str,$useArray);
336:
337: // based on investigation, native fails sometimes returning null.
338: // see: http://gggeek.altervista.org/sw/article_20070425.html
339: // As of PHP 5.3.6 it still fails on some valid JSON strings
340: if($json !== null)
341: return $json;
342: }
343:
344: $str = self::reduceString($str);
345:
346: switch (strtolower($str)) {
347: case 'true':
348: return true;
349:
350: case 'false':
351: return false;
352:
353: case 'null':
354: return null;
355:
356: default:
357: if (is_numeric($str)) {
358: // Lookie-loo, it's a number
359:
360: // This would work on its own, but I'm trying to be
361: // good about returning integers where appropriate:
362: // return (float)$str;
363:
364: // Return float or int, as appropriate
365: return ((float)$str == (integer)$str)
366: ? (integer)$str
367: : (float)$str;
368:
369: } elseif (preg_match('/^("|\').+(\1)$/s', $str, $m) && $m[1] == $m[2]) {
370: // STRINGS RETURNED IN UTF-8 FORMAT
371: $delim = substr($str, 0, 1);
372: $chrs = substr($str, 1, -1);
373: $utf8 = '';
374: $strlen_chrs = strlen($chrs);
375:
376: for ($c = 0; $c < $strlen_chrs; ++$c) {
377:
378: $substr_chrs_c_2 = substr($chrs, $c, 2);
379: $ord_chrs_c = ord($chrs{$c});
380:
381: switch (true) {
382: case $substr_chrs_c_2 == '\b':
383: $utf8 .= chr(0x08);
384: ++$c;
385: break;
386: case $substr_chrs_c_2 == '\t':
387: $utf8 .= chr(0x09);
388: ++$c;
389: break;
390: case $substr_chrs_c_2 == '\n':
391: $utf8 .= chr(0x0A);
392: ++$c;
393: break;
394: case $substr_chrs_c_2 == '\f':
395: $utf8 .= chr(0x0C);
396: ++$c;
397: break;
398: case $substr_chrs_c_2 == '\r':
399: $utf8 .= chr(0x0D);
400: ++$c;
401: break;
402:
403: case $substr_chrs_c_2 == '\\"':
404: case $substr_chrs_c_2 == '\\\'':
405: case $substr_chrs_c_2 == '\\\\':
406: case $substr_chrs_c_2 == '\\/':
407: if (($delim == '"' && $substr_chrs_c_2 != '\\\'') ||
408: ($delim == "'" && $substr_chrs_c_2 != '\\"')) {
409: $utf8 .= $chrs{++$c};
410: }
411: break;
412:
413: case preg_match('/\\\u[0-9A-F]{4}/i', substr($chrs, $c, 6)):
414: // single, escaped unicode character
415: $utf16 = chr(hexdec(substr($chrs, ($c+2), 2)))
416: . chr(hexdec(substr($chrs, ($c+4), 2)));
417: $utf8 .= self::utf16beToUTF8($utf16);
418: $c+=5;
419: break;
420:
421: case ($ord_chrs_c >= 0x20) && ($ord_chrs_c <= 0x7F):
422: $utf8 .= $chrs{$c};
423: break;
424:
425: case ($ord_chrs_c & 0xE0) == 0xC0:
426: // characters U-00000080 - U-000007FF, mask 110XXXXX
427: //see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
428: $utf8 .= substr($chrs, $c, 2);
429: ++$c;
430: break;
431:
432: case ($ord_chrs_c & 0xF0) == 0xE0:
433: // characters U-00000800 - U-0000FFFF, mask 1110XXXX
434: // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
435: $utf8 .= substr($chrs, $c, 3);
436: $c += 2;
437: break;
438:
439: case ($ord_chrs_c & 0xF8) == 0xF0:
440: // characters U-00010000 - U-001FFFFF, mask 11110XXX
441: // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
442: $utf8 .= substr($chrs, $c, 4);
443: $c += 3;
444: break;
445:
446: case ($ord_chrs_c & 0xFC) == 0xF8:
447: // characters U-00200000 - U-03FFFFFF, mask 111110XX
448: // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
449: $utf8 .= substr($chrs, $c, 5);
450: $c += 4;
451: break;
452:
453: case ($ord_chrs_c & 0xFE) == 0xFC:
454: // characters U-04000000 - U-7FFFFFFF, mask 1111110X
455: // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
456: $utf8 .= substr($chrs, $c, 6);
457: $c += 5;
458: break;
459:
460: }
461:
462: }
463:
464: return $utf8;
465:
466: } elseif (preg_match('/^\[.*\]$/s', $str) || preg_match('/^\{.*\}$/s', $str)) {
467: // array, or object notation
468:
469: if ($str{0} == '[') {
470: $stk = array(self::JSON_IN_ARR);
471: $arr = array();
472: } else {
473: if ($useArray) {
474: $stk = array(self::JSON_IN_OBJ);
475: $obj = array();
476: } else {
477: $stk = array(self::JSON_IN_OBJ);
478: $obj = new stdClass();
479: }
480: }
481:
482: $stk[] = array('what' => self::JSON_SLICE, 'where' => 0, 'delim' => false);
483:
484: $chrs = substr($str, 1, -1);
485: $chrs = self::reduceString($chrs);
486:
487: if ($chrs == '') {
488: if (reset($stk) == self::JSON_IN_ARR) {
489: return $arr;
490:
491: } else {
492: return $obj;
493:
494: }
495: }
496:
497: //print("\nparsing {$chrs}\n");
498:
499: $strlen_chrs = strlen($chrs);
500:
501: for ($c = 0; $c <= $strlen_chrs; ++$c) {
502:
503: $top = end($stk);
504: $substr_chrs_c_2 = substr($chrs, $c, 2);
505:
506: if (($c == $strlen_chrs) || (($chrs{$c} == ',') && ($top['what'] == self::JSON_SLICE))) {
507: // found a comma that is not inside a string, array, etc.,
508: // OR we've reached the end of the character list
509: $slice = substr($chrs, $top['where'], ($c - $top['where']));
510: $stk[] = array('what' => self::JSON_SLICE, 'where' => ($c + 1), 'delim' => false);
511: //print("Found split at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n");
512:
513: if (reset($stk) == self::JSON_IN_ARR) {
514: // we are in an array, so just push an element onto the stack
515: $arr[] = self::decode($slice,$useArray);
516:
517: } elseif (reset($stk) == self::JSON_IN_OBJ) {
518: // we are in an object, so figure
519: // out the property name and set an
520: // element in an associative array,
521: // for now
522: if (preg_match('/^\s*(["\'].*[^\\\]["\'])\s*:\s*(\S.*),?$/Uis', $slice, $parts)) {
523: // "name":value pair
524: $key = self::decode($parts[1],$useArray);
525: $val = self::decode($parts[2],$useArray);
526:
527: if ($useArray) {
528: $obj[$key] = $val;
529: } else {
530: $obj->$key = $val;
531: }
532: } elseif (preg_match('/^\s*(\w+)\s*:\s*(\S.*),?$/Uis', $slice, $parts)) {
533: // name:value pair, where name is unquoted
534: $key = $parts[1];
535: $val = self::decode($parts[2],$useArray);
536:
537: if ($useArray) {
538: $obj[$key] = $val;
539: } else {
540: $obj->$key = $val;
541: }
542: }
543:
544: }
545:
546: } elseif ((($chrs{$c} == '"') || ($chrs{$c} == "'")) && ($top['what'] != self::JSON_IN_STR)) {
547: // found a quote, and we are not inside a string
548: $stk[] = array('what' => self::JSON_IN_STR, 'where' => $c, 'delim' => $chrs{$c});
549: //print("Found start of string at {$c}\n");
550:
551: } elseif (($chrs{$c} == $top['delim']) &&
552: ($top['what'] == self::JSON_IN_STR) &&
553: (($chrs{$c - 1} != "\\") ||
554: ($chrs{$c - 1} == "\\" && $chrs{$c - 2} == "\\"))) {
555: // found a quote, we're in a string, and it's not escaped
556: array_pop($stk);
557: //print("Found end of string at {$c}: ".substr($chrs, $top['where'], (1 + 1 + $c - $top['where']))."\n");
558:
559: } elseif (($chrs{$c} == '[') &&
560: in_array($top['what'], array(self::JSON_SLICE, self::JSON_IN_ARR, self::JSON_IN_OBJ))) {
561: // found a left-bracket, and we are in an array, object, or slice
562: $stk[] = array('what' => self::JSON_IN_ARR, 'where' => $c, 'delim' => false);
563: //print("Found start of array at {$c}\n");
564:
565: } elseif (($chrs{$c} == ']') && ($top['what'] == self::JSON_IN_ARR)) {
566: // found a right-bracket, and we're in an array
567: array_pop($stk);
568: //print("Found end of array at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n");
569:
570: } elseif (($chrs{$c} == '{') &&
571: in_array($top['what'], array(self::JSON_SLICE, self::JSON_IN_ARR, self::JSON_IN_OBJ))) {
572: // found a left-brace, and we are in an array, object, or slice
573: $stk[] = array('what' => self::JSON_IN_OBJ, 'where' => $c, 'delim' => false);
574: //print("Found start of object at {$c}\n");
575:
576: } elseif (($chrs{$c} == '}') && ($top['what'] == self::JSON_IN_OBJ)) {
577: // found a right-brace, and we're in an object
578: array_pop($stk);
579: //print("Found end of object at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n");
580:
581: } elseif (($substr_chrs_c_2 == '/*') &&
582: in_array($top['what'], array(self::JSON_SLICE, self::JSON_IN_ARR, self::JSON_IN_OBJ))) {
583: // found a comment start, and we are in an array, object, or slice
584: $stk[] = array('what' => self::JSON_IN_CMT, 'where' => $c, 'delim' => false);
585: $c++;
586: //print("Found start of comment at {$c}\n");
587:
588: } elseif (($substr_chrs_c_2 == '*/') && ($top['what'] == self::JSON_IN_CMT)) {
589: // found a comment end, and we're in one now
590: array_pop($stk);
591: $c++;
592:
593: for ($i = $top['where']; $i <= $c; ++$i)
594: $chrs = substr_replace($chrs, ' ', $i, 1);
595:
596: //print("Found end of comment at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n");
597:
598: }
599:
600: }
601:
602: if (reset($stk) == self::JSON_IN_ARR) {
603: return $arr;
604:
605: } elseif (reset($stk) == self::JSON_IN_OBJ) {
606: return $obj;
607:
608: }
609:
610: }
611: }
612: }
613:
614: /**
615: * This function returns any UTF-8 encoded text as a list of
616: * Unicode values:
617: * @param string $str string to convert
618: * @return string
619: * @author Scott Michael Reynen <scott@randomchaos.com>
620: * @link http://www.randomchaos.com/document.php?source=php_and_unicode
621: * @see unicodeToUTF8()
622: */
623: protected static function utf8ToUnicode( &$str )
624: {
625: $unicode = array();
626: $values = array();
627: $lookingFor = 1;
628:
629: for ($i = 0; $i < strlen( $str ); $i++ )
630: {
631: $thisValue = ord( $str[ $i ] );
632: if ( $thisValue < 128 )
633: $unicode[] = $thisValue;
634: else
635: {
636: if ( count( $values ) == 0 )
637: $lookingFor = ( $thisValue < 224 ) ? 2 : 3;
638: $values[] = $thisValue;
639: if ( count( $values ) == $lookingFor )
640: {
641: $number = ( $lookingFor == 3 ) ?
642: ( ( $values[0] % 16 ) * 4096 ) + ( ( $values[1] % 64 ) * 64 ) + ( $values[2] % 64 ):
643: ( ( $values[0] % 32 ) * 64 ) + ( $values[1] % 64 );
644: $unicode[] = $number;
645: $values = array();
646: $lookingFor = 1;
647: }
648: }
649: }
650: return $unicode;
651: }
652:
653: /**
654: * This function converts a Unicode array back to its UTF-8 representation
655: * @param string $str string to convert
656: * @return string
657: * @author Scott Michael Reynen <scott@randomchaos.com>
658: * @link http://www.randomchaos.com/document.php?source=php_and_unicode
659: * @see utf8ToUnicode()
660: */
661: protected static function unicodeToUTF8( &$str )
662: {
663: $utf8 = '';
664: foreach( $str as $unicode )
665: {
666: if ( $unicode < 128 )
667: {
668: $utf8.= chr( $unicode );
669: }
670: elseif ( $unicode < 2048 )
671: {
672: $utf8.= chr( 192 + ( ( $unicode - ( $unicode % 64 ) ) / 64 ) );
673: $utf8.= chr( 128 + ( $unicode % 64 ) );
674: }
675: else
676: {
677: $utf8.= chr( 224 + ( ( $unicode - ( $unicode % 4096 ) ) / 4096 ) );
678: $utf8.= chr( 128 + ( ( ( $unicode % 4096 ) - ( $unicode % 64 ) ) / 64 ) );
679: $utf8.= chr( 128 + ( $unicode % 64 ) );
680: }
681: }
682: return $utf8;
683: }
684:
685: /**
686: * UTF-8 to UTF-16BE conversion.
687: *
688: * Maybe really UCS-2 without mb_string due to utf8ToUnicode limits
689: * @param string $str string to convert
690: * @param boolean $bom whether to output BOM header
691: * @return string
692: */
693: protected static function utf8ToUTF16BE(&$str, $bom = false)
694: {
695: $out = $bom ? "\xFE\xFF" : '';
696: if(function_exists('mb_convert_encoding'))
697: return $out.mb_convert_encoding($str,'UTF-16BE','UTF-8');
698:
699: $uni = self::utf8ToUnicode($str);
700: foreach($uni as $cp)
701: $out .= pack('n',$cp);
702: return $out;
703: }
704:
705: /**
706: * UTF-8 to UTF-16BE conversion.
707: *
708: * Maybe really UCS-2 without mb_string due to utf8ToUnicode limits
709: * @param string $str string to convert
710: * @return string
711: */
712: protected static function utf16beToUTF8(&$str)
713: {
714: $uni = unpack('n*',$str);
715: return self::unicodeToUTF8($uni);
716: }
717: }
718: