00001 <?php
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103
00104
00105
00106
00107
00108
00109
00110
00111
00112
00113
00114
00115
00116
00117
00118
00119
00120
00121
00122
00123
00124
00125
00126
00127
00128
00129
00130
00131
00132
00133
00134
00135 class t3lib_cs {
00136 var $noCharByteVal=63;
00137
00138
00139 var $parsedCharsets=array();
00140
00141
00142 var $caseFolding=array();
00143
00144
00145 var $toASCII=array();
00146
00147
00148 var $twoByteSets=array(
00149 'ucs-2'=>1,
00150 );
00151
00152
00153 var $fourByteSets=array(
00154 'ucs-4'=>1,
00155 'utf-32'=>1,
00156 );
00157
00158
00159 var $eucBasedSets=array(
00160 'gb2312'=>1,
00161 'big5'=>1,
00162 'euc-kr'=>1,
00163 'shift_jis'=>1,
00164 );
00165
00166
00167
00168 var $synonyms=array(
00169 'us' => 'ascii',
00170 'us-ascii'=> 'ascii',
00171 'cp819' => 'iso-8859-1',
00172 'ibm819' => 'iso-8859-1',
00173 'iso-ir-100' => 'iso-8859-1',
00174 'iso-ir-101' => 'iso-8859-2',
00175 'iso-ir-109' => 'iso-8859-3',
00176 'iso-ir-110' => 'iso-8859-4',
00177 'iso-ir-144' => 'iso-8859-5',
00178 'iso-ir-127' => 'iso-8859-6',
00179 'iso-ir-126' => 'iso-8859-7',
00180 'iso-ir-138' => 'iso-8859-8',
00181 'iso-ir-148' => 'iso-8859-9',
00182 'iso-ir-157' => 'iso-8859-10',
00183 'iso-ir-179' => 'iso-8859-13',
00184 'iso-ir-199' => 'iso-8859-14',
00185 'iso-ir-203' => 'iso-8859-15',
00186 'csisolatin1' => 'iso-8859-1',
00187 'csisolatin2' => 'iso-8859-2',
00188 'csisolatin3' => 'iso-8859-3',
00189 'csisolatin5' => 'iso-8859-9',
00190 'csisolatin8' => 'iso-8859-14',
00191 'csisolatin9' => 'iso-8859-15',
00192 'csisolatingreek' => 'iso-8859-7',
00193 'iso-celtic' => 'iso-8859-14',
00194 'latin1' => 'iso-8859-1',
00195 'latin2' => 'iso-8859-2',
00196 'latin3' => 'iso-8859-3',
00197 'latin5' => 'iso-8859-9',
00198 'latin6' => 'iso-8859-10',
00199 'latin8' => 'iso-8859-14',
00200 'latin9' => 'iso-8859-15',
00201 'l1' => 'iso-8859-1',
00202 'l2' => 'iso-8859-2',
00203 'l3' => 'iso-8859-3',
00204 'l5' => 'iso-8859-9',
00205 'l6' => 'iso-8859-10',
00206 'l8' => 'iso-8859-14',
00207 'l9' => 'iso-8859-15',
00208 'cyrillic' => 'iso-8859-5',
00209 'arabic' => 'iso-8859-6',
00210 'tis-620' => 'iso-8859-11',
00211 'win874' => 'windows-874',
00212 'win1250' => 'windows-1250',
00213 'win1251' => 'windows-1251',
00214 'win1252' => 'windows-1252',
00215 'win1253' => 'windows-1253',
00216 'win1254' => 'windows-1254',
00217 'win1255' => 'windows-1255',
00218 'win1256' => 'windows-1256',
00219 'win1257' => 'windows-1257',
00220 'win1258' => 'windows-1258',
00221 'cp1250' => 'windows-1250',
00222 'cp1251' => 'windows-1251',
00223 'cp1252' => 'windows-1252',
00224 'ms-ee' => 'windows-1250',
00225 'ms-ansi' => 'windows-1252',
00226 'ms-greek' => 'windows-1253',
00227 'ms-turk' => 'windows-1254',
00228 'winbaltrim' => 'windows-1257',
00229 'koi-8ru' => 'koi-8r',
00230 'koi8r' => 'koi-8r',
00231 'cp878' => 'koi-8r',
00232 'mac' => 'macroman',
00233 'macintosh' => 'macroman',
00234 'euc-cn' => 'gb2312',
00235 'x-euc-cn' => 'gb2312',
00236 'euccn' => 'gb2312',
00237 'cp936' => 'gb2312',
00238 'big-5' => 'big5',
00239 'cp950' => 'big5',
00240 'eucjp' => 'euc-jp',
00241 'sjis' => 'shift_jis',
00242 'shift-jis' => 'shift_jis',
00243 'cp932' => 'shift_jis',
00244 'cp949' => 'euc-kr',
00245 'utf7' => 'utf-7',
00246 'utf8' => 'utf-8',
00247 'utf16' => 'utf-16',
00248 'utf32' => 'utf-32',
00249 'utf8' => 'utf-8',
00250 'ucs2' => 'ucs-2',
00251 'ucs4' => 'ucs-4',
00252 );
00253
00254
00255 var $lang_to_script=array(
00256
00257 'ar' => 'arabic',
00258 'bg' => 'cyrillic',
00259 'bs' => 'east_european',
00260 'cs' => 'east_european',
00261 'da' => 'west_european',
00262 'de' => 'west_european',
00263 'es' => 'west_european',
00264 'et' => 'estonian',
00265 'eo' => 'unicode',
00266 'eu' => 'west_european',
00267 'fa' => 'arabic',
00268 'fi' => 'west_european',
00269 'fo' => 'west_european',
00270 'fr' => 'west_european',
00271 'ga' => 'west_european',
00272 'ge' => 'unicode',
00273 'gr' => 'greek',
00274 'he' => 'hebrew',
00275 'hi' => 'unicode',
00276 'hr' => 'east_european',
00277 'hu' => 'east_european',
00278 'iw' => 'hebrew',
00279 'is' => 'west_european',
00280 'it' => 'west_european',
00281 'ja' => 'japanese',
00282 'kl' => 'west_european',
00283 'ko' => 'korean',
00284 'lt' => 'lithuanian',
00285 'lv' => 'west_european',
00286 'nl' => 'west_european',
00287 'no' => 'west_european',
00288 'nb' => 'west_european',
00289 'nn' => 'west_european',
00290 'pl' => 'east_european',
00291 'pt' => 'west_european',
00292 'ro' => 'east_european',
00293 'ru' => 'cyrillic',
00294 'sk' => 'east_european',
00295 'sl' => 'east_european',
00296 'sr' => 'cyrillic',
00297 'sv' => 'west_european',
00298 'sq' => 'albanian',
00299 'th' => 'thai',
00300 'uk' => 'cyrillic',
00301 'vi' => 'vietnamese',
00302 'zh' => 'chinese',
00303
00304
00305 'ara' => 'arabic',
00306 'bgr' => 'cyrillic',
00307 'cat' => 'west_european',
00308 'chs' => 'simpl_chinese',
00309 'cht' => 'trad_chinese',
00310 'csy' => 'east_european',
00311 'dan' => 'west_european',
00312 'deu' => 'west_european',
00313 'dea' => 'west_european',
00314 'des' => 'west_european',
00315 'ena' => 'west_european',
00316 'enc' => 'west_european',
00317 'eng' => 'west_european',
00318 'enz' => 'west_european',
00319 'enu' => 'west_european',
00320 'euq' => 'west_european',
00321 'fos' => 'west_european',
00322 'far' => 'arabic',
00323 'fin' => 'west_european',
00324 'fra' => 'west_european',
00325 'frb' => 'west_european',
00326 'frc' => 'west_european',
00327 'frs' => 'west_european',
00328 'geo' => 'unicode',
00329 'glg' => 'west_european',
00330 'ell' => 'greek',
00331 'heb' => 'hebrew',
00332 'hin' => 'unicode',
00333 'hun' => 'east_european',
00334 'isl' => 'west_euorpean',
00335 'ita' => 'west_european',
00336 'its' => 'west_european',
00337 'jpn' => 'japanese',
00338 'kor' => 'korean',
00339 'lth' => 'lithuanian',
00340 'lvi' => 'west_european',
00341 'msl' => 'west_european',
00342 'nlb' => 'west_european',
00343 'nld' => 'west_european',
00344 'nor' => 'west_european',
00345 'non' => 'west_european',
00346 'plk' => 'east_european',
00347 'ptg' => 'west_european',
00348 'ptb' => 'west_european',
00349 'rom' => 'east_european',
00350 'rus' => 'cyrillic',
00351 'slv' => 'east_european',
00352 'sky' => 'east_european',
00353 'srl' => 'east_european',
00354 'srb' => 'cyrillic',
00355 'esp' => 'west_european',
00356 'esm' => 'west_european',
00357 'esn' => 'west_european',
00358 'sve' => 'west_european',
00359 'sqi' => 'albanian',
00360 'tha' => 'thai',
00361 'trk' => 'turkish',
00362 'ukr' => 'cyrillic',
00363
00364 'albanian' => 'albanian',
00365 'arabic' => 'arabic',
00366 'basque' => 'west_european',
00367 'bosnian' => 'east_european',
00368 'bulgarian' => 'east_european',
00369 'catalan' => 'west_european',
00370 'croatian' => 'east_european',
00371 'czech' => 'east_european',
00372 'danish' => 'west_european',
00373 'dutch' => 'west_european',
00374 'english' => 'west_european',
00375 'esperanto' => 'unicode',
00376 'estonian' => 'estonian',
00377 'faroese' => 'west_european',
00378 'farsi' => 'arabic',
00379 'finnish' => 'west_european',
00380 'french' => 'west_european',
00381 'galician' => 'west_european',
00382 'georgian' => 'unicode',
00383 'german' => 'west_european',
00384 'greek' => 'greek',
00385 'greenlandic' => 'west_european',
00386 'hebrew' => 'hebrew',
00387 'hindi' => 'unicode',
00388 'hungarian' => 'east_european',
00389 'icelandic' => 'west_european',
00390 'italian' => 'west_european',
00391 'latvian' => 'west_european',
00392 'lettish' => 'west_european',
00393 'lithuanian' => 'lithuanian',
00394 'malay' => 'west_european',
00395 'norwegian' => 'west_european',
00396 'persian' => 'arabic',
00397 'polish' => 'east_european',
00398 'portuguese' => 'west_european',
00399 'russian' => 'cyrillic',
00400 'romanian' => 'east_european',
00401 'serbian' => 'cyrillic',
00402 'slovak' => 'east_european',
00403 'slovenian' => 'east_european',
00404 'spanish' => 'west_european',
00405 'svedish' => 'west_european',
00406 'that' => 'thai',
00407 'turkish' => 'turkish',
00408 'ukrainian' => 'cyrillic',
00409 );
00410
00411
00412 var $script_to_charset_unix=array(
00413 'west_european' => 'iso-8859-1',
00414 'estonian' => 'iso-8859-1',
00415 'east_european' => 'iso-8859-2',
00416 'baltic' => 'iso-8859-4',
00417 'cyrillic' => 'iso-8859-5',
00418 'arabic' => 'iso-8859-6',
00419 'greek' => 'iso-8859-7',
00420 'hebrew' => 'iso-8859-8',
00421 'turkish' => 'iso-8859-9',
00422 'thai' => 'iso-8859-11',
00423 'lithuanian' => 'iso-8859-13',
00424 'chinese' => 'gb2312',
00425 'japanese' => 'euc-jp',
00426 'korean' => 'euc-kr',
00427 'simpl_chinese' => 'gb2312',
00428 'trad_chinese' => 'big5',
00429 'vietnamese' => '',
00430 'unicode' => 'utf-8',
00431 'albanian' => 'utf-8'
00432 );
00433
00434
00435 var $script_to_charset_windows=array(
00436 'east_european' => 'windows-1250',
00437 'cyrillic' => 'windows-1251',
00438 'west_european' => 'windows-1252',
00439 'greek' => 'windows-1253',
00440 'turkish' => 'windows-1254',
00441 'hebrew' => 'windows-1255',
00442 'arabic' => 'windows-1256',
00443 'baltic' => 'windows-1257',
00444 'estonian' => 'windows-1257',
00445 'lithuanian' => 'windows-1257',
00446 'vietnamese' => 'windows-1258',
00447 'thai' => 'cp874',
00448 'korean' => 'cp949',
00449 'chinese' => 'gb2312',
00450 'japanese' => 'shift_jis',
00451 'simpl_chinese' => 'gb2312',
00452 'trad_chinese' => 'big5',
00453 'albanian' => 'windows-1250',
00454 'unicode' => 'utf-8'
00455 );
00456
00457
00458 var $locale_to_charset=array(
00459 'japanese.euc' => 'euc-jp',
00460 'ja_jp.ujis' => 'euc-jp',
00461 'korean.euc' => 'euc-kr',
00462 'sr@Latn' => 'iso-8859-2',
00463 'zh_cn' => 'gb2312',
00464 'zh_hk' => 'big5',
00465 'zh_tw' => 'big5',
00466 );
00467
00468
00469
00470 var $charSetArray = array(
00471 'dk' => '',
00472 'de' => '',
00473 'no' => '',
00474 'it' => '',
00475 'fr' => '',
00476 'es' => '',
00477 'nl' => '',
00478 'cz' => 'windows-1250',
00479 'pl' => 'iso-8859-2',
00480 'si' => 'windows-1250',
00481 'fi' => '',
00482 'tr' => 'iso-8859-9',
00483 'se' => '',
00484 'pt' => '',
00485 'ru' => 'windows-1251',
00486 'ro' => 'iso-8859-2',
00487 'ch' => 'gb2312',
00488 'sk' => 'windows-1250',
00489 'lt' => 'windows-1257',
00490 'is' => 'utf-8',
00491 'hr' => 'windows-1250',
00492 'hu' => 'iso-8859-2',
00493 'gl' => '',
00494 'th' => 'iso-8859-11',
00495 'gr' => 'iso-8859-7',
00496 'hk' => 'big5',
00497 'eu' => '',
00498 'bg' => 'windows-1251',
00499 'br' => '',
00500 'et' => 'iso-8859-4',
00501 'ar' => 'iso-8859-6',
00502 'he' => 'utf-8',
00503 'ua' => 'windows-1251',
00504 'jp' => 'shift_jis',
00505 'lv' => 'utf-8',
00506 'vn' => 'utf-8',
00507 'ca' => 'iso-8859-15',
00508 'ba' => 'iso-8859-2',
00509 'kr' => 'euc-kr',
00510 'eo' => 'utf-8',
00511 'my' => '',
00512 'hi' => 'utf-8',
00513 'fo' => 'utf-8',
00514 'fa' => 'utf-8',
00515 'sr' => 'utf-8',
00516 'sq' => 'utf-8',
00517 'ge' => 'utf-8',
00518 'ga' => '',
00519 );
00520
00521
00522
00523 var $isoArray = array(
00524 'ba' => 'bs',
00525 'br' => 'pt_BR',
00526 'ch' => 'zh_CN',
00527 'cz' => 'cs',
00528 'dk' => 'da',
00529 'si' => 'sl',
00530 'se' => 'sv',
00531 'gl' => 'kl',
00532 'gr' => 'el',
00533 'hk' => 'zh_HK',
00534 'kr' => 'ko',
00535 'ua' => 'uk',
00536 'jp' => 'ja',
00537 'vn' => 'vi',
00538 );
00539
00540
00541
00542
00543
00544
00545
00546
00547 function parse_charset($charset) {
00548 $charset = trim(strtolower($charset));
00549 if (isset($this->synonyms[$charset])) $charset = $this->synonyms[$charset];
00550
00551 return $charset;
00552 }
00553
00554
00555
00556
00557
00558
00559
00560
00561
00562
00563
00564
00565
00566 function get_locale_charset($locale) {
00567 $locale = strtolower($locale);
00568
00569
00570 if (isset($this->locale_to_charset[$locale])) return $this->locale_to_charset[$locale];
00571
00572
00573 list($locale,$modifier) = explode('@',$locale);
00574
00575
00576 list($locale,$charset) = explode('.',$locale);
00577 if ($charset) return $this->parse_charset($charset);
00578
00579
00580 if ($modifier == 'euro') return 'iso-8859-15';
00581
00582
00583 list($language,$country) = explode('_',$locale);
00584 if (isset($this->lang_to_script[$language])) $script = $this->lang_to_script[$language];
00585
00586 if (TYPO3_OS == 'WIN') {
00587 $cs = $this->script_to_charset_windows[$script] ? $this->script_to_charset_windows[$script] : 'windows-1252';
00588 } else {
00589 $cs = $this->script_to_charset_unix[$script] ? $this->script_to_charset_unix[$script] : 'iso-8859-1';
00590 }
00591
00592 return $cs;
00593 }
00594
00595
00596
00597
00598
00599
00600
00601
00602
00603
00604
00605
00606
00607
00608
00609
00610
00611
00612
00613
00614
00615
00616
00617
00618
00619 function conv($str,$fromCS,$toCS,$useEntityForNoChar=0) {
00620 if ($fromCS==$toCS) return $str;
00621
00622
00623 if ($toCS=='utf-8' || !$useEntityForNoChar) {
00624 switch($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_convMethod']) {
00625 case 'mbstring':
00626 $conv_str = mb_convert_encoding($str,$toCS,$fromCS);
00627 if (false !== $conv_str) return $conv_str;
00628 break;
00629
00630 case 'iconv':
00631 $conv_str = iconv($fromCS,$toCS.'
00632 if (false !== $conv_str) return $conv_str;
00633 break;
00634
00635 case 'recode':
00636 $conv_str = recode_string($fromCS.'..'.$toCS,$str);
00637 if (false !== $conv_str) return $conv_str;
00638 break;
00639 }
00640
00641 }
00642
00643 if ($fromCS!='utf-8') $str=$this->utf8_encode($str,$fromCS);
00644 if ($toCS!='utf-8') $str=$this->utf8_decode($str,$toCS,$useEntityForNoChar);
00645 return $str;
00646 }
00647
00648
00649
00650
00651
00652
00653
00654
00655
00656
00657
00658
00659 function convArray(&$array,$fromCS,$toCS,$useEntityForNoChar=0) {
00660 foreach($array as $key => $value) {
00661 if (is_array($array[$key])) {
00662 $this->convArray($array[$key],$fromCS,$toCS,$useEntityForNoChar);
00663 } elseif (is_string($array[$key])) {
00664 $array[$key] = $this->conv($array[$key],$fromCS,$toCS,$useEntityForNoChar);
00665 }
00666 }
00667 }
00668
00669
00670
00671
00672
00673
00674
00675
00676 function utf8_encode($str,$charset) {
00677
00678 if ($charset === 'utf-8') return $str;
00679
00680
00681 if ($this->initCharset($charset)) {
00682 $strLen = strlen($str);
00683 $outStr='';
00684
00685 for ($a=0;$a<$strLen;$a++) {
00686 $chr=substr($str,$a,1);
00687 $ord=ord($chr);
00688 if (isset($this->twoByteSets[$charset])) {
00689 $ord2 = ord($str{$a+1});
00690 $ord = $ord<<8 | $ord2;
00691
00692 if (isset($this->parsedCharsets[$charset]['local'][$ord])) {
00693 $outStr.=$this->parsedCharsets[$charset]['local'][$ord];
00694 } else $outStr.=chr($this->noCharByteVal);
00695 $a++;
00696 } elseif ($ord>127) {
00697 if (isset($this->eucBasedSets[$charset])) {
00698 if ($charset != 'shift_jis' || ($ord < 0xA0 || $ord > 0xDF)) {
00699 $a++;
00700 $ord2=ord(substr($str,$a,1));
00701 $ord = $ord*256+$ord2;
00702 }
00703 }
00704
00705 if (isset($this->parsedCharsets[$charset]['local'][$ord])) {
00706 $outStr.= $this->parsedCharsets[$charset]['local'][$ord];
00707 } else $outStr.= chr($this->noCharByteVal);
00708 } else $outStr.= $chr;
00709 }
00710 return $outStr;
00711 }
00712 }
00713
00714
00715
00716
00717
00718
00719
00720
00721
00722 function utf8_decode($str,$charset,$useEntityForNoChar=0) {
00723
00724 if ($charset === 'utf-8') {
00725 return $str;
00726 }
00727
00728
00729 if ($this->initCharset($charset)) {
00730 $strLen = strlen($str);
00731 $outStr='';
00732 $buf='';
00733 for ($a=0,$i=0;$a<$strLen;$a++,$i++) {
00734 $chr=substr($str,$a,1);
00735 $ord=ord($chr);
00736 if ($ord>127) {
00737 if ($ord & 64) {
00738
00739 $buf=$chr;
00740 for ($b=0;$b<8;$b++) {
00741 $ord = $ord << 1;
00742 if ($ord & 128) {
00743 $a++;
00744 $buf.=substr($str,$a,1);
00745 } else break;
00746 }
00747
00748 if (isset($this->parsedCharsets[$charset]['utf8'][$buf])) {
00749 $mByte = $this->parsedCharsets[$charset]['utf8'][$buf];
00750 if ($mByte>255) {
00751 $outStr.= chr(($mByte >> 8) & 255).chr($mByte & 255);
00752 } else $outStr.= chr($mByte);
00753 } elseif ($useEntityForNoChar) {
00754 $outStr.='&#'.$this->utf8CharToUnumber($buf,1).';';
00755 } else $outStr.=chr($this->noCharByteVal);
00756 } else $outStr.=chr($this->noCharByteVal);
00757 } else $outStr.=$chr;
00758 }
00759 return $outStr;
00760 }
00761 }
00762
00763
00764
00765
00766
00767
00768
00769 function utf8_to_entities($str) {
00770 $strLen = strlen($str);
00771 $outStr='';
00772 $buf='';
00773 for ($a=0;$a<$strLen;$a++) {
00774 $chr=substr($str,$a,1);
00775 $ord=ord($chr);
00776 if ($ord>127) {
00777 if ($ord & 64) {
00778 $buf=$chr;
00779 for ($b=0;$b<8;$b++) {
00780 $ord = $ord << 1;
00781 if ($ord & 128) {
00782 $a++;
00783 $buf.=substr($str,$a,1);
00784 } else break;
00785 }
00786
00787 $outStr.='&#'.$this->utf8CharToUnumber($buf,1).';';
00788 } else $outStr.=chr($this->noCharByteVal);
00789 } else $outStr.=$chr;
00790 }
00791
00792 return $outStr;
00793 }
00794
00795
00796
00797
00798
00799
00800
00801
00802 function entities_to_utf8($str,$alsoStdHtmlEnt=0) {
00803 if ($alsoStdHtmlEnt) {
00804 $trans_tbl = array_flip(get_html_translation_table(HTML_ENTITIES));
00805 }
00806
00807 $token = md5(microtime());
00808 $parts = explode($token, preg_replace('/(&([#[:alnum:]]*);)/', $token . '${2}' . $token, $str));
00809 foreach($parts as $k => $v) {
00810 if ($k%2) {
00811 if (substr($v,0,1)=='#') {
00812 if (substr($v,1,1)=='x') {
00813 $parts[$k] = $this->UnumberToChar(hexdec(substr($v,2)));
00814 } else {
00815 $parts[$k] = $this->UnumberToChar(substr($v,1));
00816 }
00817 } elseif ($alsoStdHtmlEnt && $trans_tbl['&'.$v.';']) {
00818 $parts[$k] = $this->utf8_encode($trans_tbl['&'.$v.';'],'iso-8859-1');
00819 } else {
00820 $parts[$k] ='&'.$v.';';
00821 }
00822 }
00823 }
00824
00825 return implode('',$parts);
00826 }
00827
00828
00829
00830
00831
00832
00833
00834
00835
00836 function utf8_to_numberarray($str,$convEntities=0,$retChar=0) {
00837
00838 if ($convEntities) {
00839 $str = $this->entities_to_utf8($str,1);
00840 }
00841
00842 $strLen = strlen($str);
00843 $outArr=array();
00844 $buf='';
00845 for ($a=0;$a<$strLen;$a++) {
00846 $chr=substr($str,$a,1);
00847 $ord=ord($chr);
00848 if ($ord>127) {
00849 if ($ord & 64) {
00850 $buf=$chr;
00851 for ($b=0;$b<8;$b++) {
00852 $ord = $ord << 1;
00853 if ($ord & 128) {
00854 $a++;
00855 $buf.=substr($str,$a,1);
00856 } else break;
00857 }
00858
00859 $outArr[]=$retChar?$buf:$this->utf8CharToUnumber($buf);
00860 } else $outArr[]=$retChar?chr($this->noCharByteVal):$this->noCharByteVal;
00861 } else $outArr[]=$retChar?chr($ord):$ord;
00862 }
00863
00864 return $outArr;
00865 }
00866
00867
00868
00869
00870
00871
00872
00873
00874
00875
00876
00877
00878
00879
00880
00881
00882
00883
00884
00885
00886 function UnumberToChar($cbyte) {
00887 $str='';
00888
00889 if ($cbyte < 0x80) {
00890 $str.=chr($cbyte);
00891 } else if ($cbyte < 0x800) {
00892 $str.=chr(0xC0 | ($cbyte >> 6));
00893 $str.=chr(0x80 | ($cbyte & 0x3F));
00894 } else if ($cbyte < 0x10000) {
00895 $str.=chr(0xE0 | ($cbyte >> 12));
00896 $str.=chr(0x80 | (($cbyte >> 6) & 0x3F));
00897 $str.=chr(0x80 | ($cbyte & 0x3F));
00898 } else if ($cbyte < 0x200000) {
00899 $str.=chr(0xF0 | ($cbyte >> 18));
00900 $str.=chr(0x80 | (($cbyte >> 12) & 0x3F));
00901 $str.=chr(0x80 | (($cbyte >> 6) & 0x3F));
00902 $str.=chr(0x80 | ($cbyte & 0x3F));
00903 } else if ($cbyte < 0x4000000) {
00904 $str.=chr(0xF8 | ($cbyte >> 24));
00905 $str.=chr(0x80 | (($cbyte >> 18) & 0x3F));
00906 $str.=chr(0x80 | (($cbyte >> 12) & 0x3F));
00907 $str.=chr(0x80 | (($cbyte >> 6) & 0x3F));
00908 $str.=chr(0x80 | ($cbyte & 0x3F));
00909 } else if ($cbyte < 0x80000000) {
00910 $str.=chr(0xFC | ($cbyte >> 30));
00911 $str.=chr(0x80 | (($cbyte >> 24) & 0x3F));
00912 $str.=chr(0x80 | (($cbyte >> 18) & 0x3F));
00913 $str.=chr(0x80 | (($cbyte >> 12) & 0x3F));
00914 $str.=chr(0x80 | (($cbyte >> 6) & 0x3F));
00915 $str.=chr(0x80 | ($cbyte & 0x3F));
00916 } else {
00917 $str .= chr($this->noCharByteVal);
00918 }
00919 return $str;
00920 }
00921
00922
00923
00924
00925
00926
00927
00928
00929
00930
00931 function utf8CharToUnumber($str,$hex=0) {
00932 $ord=ord(substr($str,0,1));
00933
00934 if (($ord & 192) == 192) {
00935 $binBuf='';
00936 for ($b=0;$b<8;$b++) {
00937 $ord = $ord << 1;
00938 if ($ord & 128) {
00939 $binBuf.=substr('00000000'.decbin(ord(substr($str,$b+1,1))),-6);
00940 } else break;
00941 }
00942 $binBuf=substr('00000000'.decbin(ord(substr($str,0,1))),-(6-$b)).$binBuf;
00943
00944 $int = bindec($binBuf);
00945 } else $int = $ord;
00946
00947 return $hex ? 'x'.dechex($int) : $int;
00948 }
00949
00950
00951
00952
00953
00954
00955
00956
00957
00958
00959
00960
00961
00962
00963
00964
00965
00966
00967
00968
00969
00970
00971
00972
00973
00974 function initCharset($charset) {
00975
00976 if (!is_array($this->parsedCharsets[$charset])) {
00977
00978
00979 $charsetConvTableFile = PATH_t3lib.'csconvtbl/'.$charset.'.tbl';
00980
00981
00982 if ($charset && t3lib_div::validPathStr($charsetConvTableFile) && @is_file($charsetConvTableFile)) {
00983
00984
00985 $cacheFile = t3lib_div::getFileAbsFileName('typo3temp/cs/charset_'.$charset.'.tbl');
00986 if ($cacheFile && @is_file($cacheFile)) {
00987 $this->parsedCharsets[$charset]=unserialize(t3lib_div::getUrl($cacheFile));
00988 } else {
00989
00990 $lines=t3lib_div::trimExplode(LF,t3lib_div::getUrl($charsetConvTableFile),1);
00991
00992 $this->parsedCharsets[$charset]=array('local'=>array(),'utf8'=>array());
00993
00994 $detectedType='';
00995 foreach($lines as $value) {
00996 if (trim($value) && substr($value,0,1)!='#') {
00997
00998
00999
01000 if (!$detectedType) $detectedType = preg_match('/[[:space:]]*0x([[:alnum:]]*)[[:space:]]+0x([[:alnum:]]*)[[:space:]]+/',$value) ? 'whitespaced' : 'ms-token';
01001
01002 if ($detectedType=='ms-token') {
01003 list($hexbyte, $utf8) = preg_split('/[=:]/', $value, 3);
01004 } elseif ($detectedType=='whitespaced') {
01005 $regA=array();
01006 preg_match('/[[:space:]]*0x([[:alnum:]]*)[[:space:]]+0x([[:alnum:]]*)[[:space:]]+/',$value,$regA);
01007 $hexbyte = $regA[1];
01008 $utf8 = 'U+'.$regA[2];
01009 }
01010 $decval = hexdec(trim($hexbyte));
01011 if ($decval>127) {
01012 $utf8decval = hexdec(substr(trim($utf8),2));
01013 $this->parsedCharsets[$charset]['local'][$decval]=$this->UnumberToChar($utf8decval);
01014 $this->parsedCharsets[$charset]['utf8'][$this->parsedCharsets[$charset]['local'][$decval]]=$decval;
01015 }
01016 }
01017 }
01018 if ($cacheFile) {
01019 t3lib_div::writeFileToTypo3tempDir($cacheFile,serialize($this->parsedCharsets[$charset]));
01020 }
01021 }
01022 return 2;
01023 } else return false;
01024 } else return 1;
01025 }
01026
01027
01028
01029
01030
01031
01032
01033
01034
01035
01036 function initUnicodeData($mode=null) {
01037
01038 $cacheFileCase = t3lib_div::getFileAbsFileName('typo3temp/cs/cscase_utf-8.tbl');
01039 $cacheFileASCII = t3lib_div::getFileAbsFileName('typo3temp/cs/csascii_utf-8.tbl');
01040
01041
01042 switch($mode) {
01043 case 'case':
01044 if (is_array($this->caseFolding['utf-8'])) return 1;
01045
01046
01047 if ($cacheFileCase && @is_file($cacheFileCase)) {
01048 $this->caseFolding['utf-8'] = unserialize(t3lib_div::getUrl($cacheFileCase));
01049 return 2;
01050 }
01051 break;
01052
01053 case 'ascii':
01054 if (is_array($this->toASCII['utf-8'])) return 1;
01055
01056
01057 if ($cacheFileASCII && @is_file($cacheFileASCII)) {
01058 $this->toASCII['utf-8'] = unserialize(t3lib_div::getUrl($cacheFileASCII));
01059 return 2;
01060 }
01061 break;
01062 }
01063
01064
01065 $unicodeDataFile = PATH_t3lib.'unidata/UnicodeData.txt';
01066 if (!(t3lib_div::validPathStr($unicodeDataFile) && @is_file($unicodeDataFile))) return false;
01067
01068 $fh = fopen($unicodeDataFile,'rb');
01069 if (!$fh) return false;
01070
01071
01072
01073 $this->caseFolding['utf-8'] = array();
01074 $utf8CaseFolding =& $this->caseFolding['utf-8'];
01075 $utf8CaseFolding['toUpper'] = array();
01076 $utf8CaseFolding['toLower'] = array();
01077 $utf8CaseFolding['toTitle'] = array();
01078
01079 $decomposition = array();
01080 $mark = array();
01081 $number = array();
01082 $omit = array();
01083
01084 while (!feof($fh)) {
01085 $line = fgets($fh,4096);
01086
01087 list($char,$name,$cat,,,$decomp,,,$num,,,,$upper,$lower,$title,) = explode(';', rtrim($line));
01088
01089 $ord = hexdec($char);
01090 if ($ord > 0xFFFF) break;
01091
01092 $utf8_char = $this->UnumberToChar($ord);
01093
01094 if ($upper) $utf8CaseFolding['toUpper'][$utf8_char] = $this->UnumberToChar(hexdec($upper));
01095 if ($lower) $utf8CaseFolding['toLower'][$utf8_char] = $this->UnumberToChar(hexdec($lower));
01096
01097 if ($title && $title != $upper) $utf8CaseFolding['toTitle'][$utf8_char] = $this->UnumberToChar(hexdec($title));
01098
01099 switch ($cat{0}) {
01100 case 'M':
01101 $mark["U+$char"] = 1;
01102 break;
01103
01104 case 'N':
01105 if ($ord > 0x80 && $num != '') $number["U+$char"] = $num;
01106 }
01107
01108
01109 $match = array();
01110 if (preg_match('/^LATIN (SMALL|CAPITAL) LETTER ([A-Z]) WITH/',$name,$match) && !$decomp) {
01111 $c = ord($match[2]);
01112 if ($match[1] == 'SMALL') $c += 32;
01113
01114 $decomposition["U+$char"] = array(dechex($c));
01115 continue;
01116 }
01117
01118 $match = array();
01119 if (preg_match('/(<.*>)? *(.+)/',$decomp,$match)) {
01120 switch($match[1]) {
01121 case '<circle>':
01122 $match[2] = '0028 '.$match[2].' 0029';
01123 break;
01124
01125 case '<square>':
01126 $match[2] = '005B '.$match[2].' 005D';
01127 break;
01128
01129 case '<compat>':
01130 if (preg_match('/^0020 /',$match[2])) continue 2;
01131 break;
01132
01133
01134 case '<initial>':
01135 case '<medial>':
01136 case '<final>':
01137 case '<isolated>':
01138 case '<vertical>':
01139 continue 2;
01140 }
01141 $decomposition["U+$char"] = explode(' ', $match[2]);
01142 }
01143 }
01144 fclose($fh);
01145
01146
01147 $specialCasingFile = PATH_t3lib.'unidata/SpecialCasing.txt';
01148 if (t3lib_div::validPathStr($specialCasingFile) && @is_file($specialCasingFile)) {
01149 $fh = fopen($specialCasingFile,'rb');
01150 if ($fh) {
01151 while (!feof($fh)) {
01152 $line = fgets($fh,4096);
01153 if ($line{0} != '#' && trim($line) != '') {
01154
01155 list($char,$lower,$title,$upper,$cond) = t3lib_div::trimExplode(';', $line);
01156 if ($cond == '' || $cond{0} == '#') {
01157 $utf8_char = $this->UnumberToChar(hexdec($char));
01158 if ($char != $lower) {
01159 $arr = explode(' ', $lower);
01160 for ($i=0; isset($arr[$i]); $i++) $arr[$i] = $this->UnumberToChar(hexdec($arr[$i]));
01161 $utf8CaseFolding['toLower'][$utf8_char] = implode('',$arr);
01162 }
01163 if ($char != $title && $title != $upper) {
01164 $arr = explode(' ', $title);
01165 for ($i=0; isset($arr[$i]); $i++) $arr[$i] = $this->UnumberToChar(hexdec($arr[$i]));
01166 $utf8CaseFolding['toTitle'][$utf8_char] = implode('',$arr);
01167 }
01168 if ($char != $upper) {
01169 $arr = explode(' ', $upper);
01170 for ($i=0; isset($arr[$i]); $i++) $arr[$i] = $this->UnumberToChar(hexdec($arr[$i]));
01171 $utf8CaseFolding['toUpper'][$utf8_char] = implode('',$arr);
01172 }
01173 }
01174 }
01175 }
01176 fclose($fh);
01177 }
01178 }
01179
01180
01181 $customTranslitFile = PATH_t3lib.'unidata/Translit.txt';
01182 if (t3lib_div::validPathStr($customTranslitFile) && @is_file($customTranslitFile)) {
01183 $fh = fopen($customTranslitFile,'rb');
01184 if ($fh) {
01185 while (!feof($fh)) {
01186 $line = fgets($fh,4096);
01187 if ($line{0} != '#' && trim($line) != '') {
01188 list($char,$translit) = t3lib_div::trimExplode(';', $line);
01189 if (!$translit) $omit["U+$char"] = 1;
01190 $decomposition["U+$char"] = explode(' ', $translit);
01191
01192 }
01193 }
01194 fclose($fh);
01195 }
01196 }
01197
01198
01199 foreach($decomposition as $from => $to) {
01200 $code_decomp = array();
01201
01202 while ($code_value = array_shift($to)) {
01203 if (isset($decomposition["U+$code_value"])) {
01204 foreach(array_reverse($decomposition["U+$code_value"]) as $cv) {
01205 array_unshift($to, $cv);
01206 }
01207 } elseif (!isset($mark["U+$code_value"])) {
01208 array_push($code_decomp, $code_value);
01209 }
01210 }
01211 if (count($code_decomp) || isset($omit[$from])) {
01212 $decomposition[$from] = $code_decomp;
01213 } else {
01214 unset($decomposition[$from]);
01215 }
01216 }
01217
01218
01219 $this->toASCII['utf-8'] = array();
01220 $ascii =& $this->toASCII['utf-8'];
01221
01222 foreach($decomposition as $from => $to) {
01223 $code_decomp = array();
01224 while ($code_value = array_shift($to)) {
01225 $ord = hexdec($code_value);
01226 if ($ord > 127)
01227 continue 2;
01228 else
01229 array_push($code_decomp,chr($ord));
01230 }
01231 $ascii[$this->UnumberToChar(hexdec($from))] = join('',$code_decomp);
01232 }
01233
01234
01235 foreach($number as $from => $to) {
01236 $utf8_char = $this->UnumberToChar(hexdec($from));
01237 if (!isset($ascii[$utf8_char])) {
01238 $ascii[$utf8_char] = $to;
01239 }
01240 }
01241
01242 if ($cacheFileCase) {
01243 t3lib_div::writeFileToTypo3tempDir($cacheFileCase,serialize($utf8CaseFolding));
01244 }
01245
01246 if ($cacheFileASCII) {
01247 t3lib_div::writeFileToTypo3tempDir($cacheFileASCII,serialize($ascii));
01248 }
01249
01250 return 3;
01251 }
01252
01253
01254
01255
01256
01257
01258
01259
01260
01261 function initCaseFolding($charset) {
01262
01263 if (is_array($this->caseFolding[$charset])) return 1;
01264
01265
01266 $cacheFile = t3lib_div::getFileAbsFileName('typo3temp/cs/cscase_'.$charset.'.tbl');
01267 if ($cacheFile && @is_file($cacheFile)) {
01268 $this->caseFolding[$charset] = unserialize(t3lib_div::getUrl($cacheFile));
01269 return 2;
01270 }
01271
01272
01273 if (!$this->initCharset($charset)) {
01274 return false;
01275 }
01276
01277
01278 if (!$this->initUnicodeData('case')) {
01279 return false;
01280 }
01281
01282 $nochar = chr($this->noCharByteVal);
01283 foreach ($this->parsedCharsets[$charset]['local'] as $ci => $utf8) {
01284
01285 $c = $this->utf8_decode($utf8, $charset);
01286
01287
01288 $cc = $this->utf8_decode($this->caseFolding['utf-8']['toUpper'][$utf8], $charset);
01289 if ($cc != '' && $cc != $nochar) $this->caseFolding[$charset]['toUpper'][$c] = $cc;
01290
01291
01292 $cc = $this->utf8_decode($this->caseFolding['utf-8']['toLower'][$utf8], $charset);
01293 if ($cc != '' && $cc != $nochar) $this->caseFolding[$charset]['toLower'][$c] = $cc;
01294
01295
01296 $cc = $this->utf8_decode($this->caseFolding['utf-8']['toTitle'][$utf8], $charset);
01297 if ($cc != '' && $cc != $nochar) $this->caseFolding[$charset]['toTitle'][$c] = $cc;
01298 }
01299
01300
01301 for ($i=ord('a'); $i<=ord('z'); $i++) {
01302 $this->caseFolding[$charset]['toUpper'][chr($i)] = chr($i-32);
01303 }
01304 for ($i=ord('A'); $i<=ord('Z'); $i++) {
01305 $this->caseFolding[$charset]['toLower'][chr($i)] = chr($i+32);
01306 }
01307
01308 if ($cacheFile) {
01309 t3lib_div::writeFileToTypo3tempDir($cacheFile,serialize($this->caseFolding[$charset]));
01310 }
01311
01312 return 3;
01313 }
01314
01315
01316
01317
01318
01319
01320
01321
01322
01323 function initToASCII($charset) {
01324
01325 if (is_array($this->toASCII[$charset])) return 1;
01326
01327
01328 $cacheFile = t3lib_div::getFileAbsFileName('typo3temp/cs/csascii_'.$charset.'.tbl');
01329 if ($cacheFile && @is_file($cacheFile)) {
01330 $this->toASCII[$charset] = unserialize(t3lib_div::getUrl($cacheFile));
01331 return 2;
01332 }
01333
01334
01335 if (!$this->initCharset($charset)) {
01336 return false;
01337 }
01338
01339
01340 if (!$this->initUnicodeData('ascii')) {
01341 return false;
01342 }
01343
01344 $nochar = chr($this->noCharByteVal);
01345 foreach ($this->parsedCharsets[$charset]['local'] as $ci => $utf8) {
01346
01347 $c = $this->utf8_decode($utf8, $charset);
01348
01349 if (isset($this->toASCII['utf-8'][$utf8])) {
01350 $this->toASCII[$charset][$c] = $this->toASCII['utf-8'][$utf8];
01351 }
01352 }
01353
01354 if ($cacheFile) {
01355 t3lib_div::writeFileToTypo3tempDir($cacheFile,serialize($this->toASCII[$charset]));
01356 }
01357
01358 return 3;
01359 }
01360
01361
01362
01363
01364
01365
01366
01367
01368
01369
01370
01371
01372
01373
01374
01375
01376
01377
01378
01379
01380
01381
01382
01383
01384
01385
01386
01387
01388
01389
01390
01391
01392
01393
01394 function substr($charset,$string,$start,$len=null) {
01395 if ($len === 0 || $string === '') {
01396 return '';
01397 }
01398
01399 if ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'mbstring') {
01400
01401 if ($len==null) {
01402 $enc = mb_internal_encoding();
01403 mb_internal_encoding($charset);
01404 $str = mb_substr($string,$start);
01405 mb_internal_encoding($enc);
01406
01407 return $str;
01408 }
01409 else {
01410 return mb_substr($string,$start,$len,$charset);
01411 }
01412 } elseif ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'iconv') {
01413
01414 if ($len==null) {
01415 $enc = iconv_get_encoding('internal_encoding');
01416 iconv_set_encoding('internal_encoding',$charset);
01417 $str = iconv_substr($string,$start);
01418 iconv_set_encoding('internal_encoding',$enc);
01419
01420 return $str;
01421 }
01422 else {
01423 return iconv_substr($string,$start,$len,$charset);
01424 }
01425 } elseif ($charset == 'utf-8') {
01426 return $this->utf8_substr($string,$start,$len);
01427 } elseif ($this->eucBasedSets[$charset]) {
01428 return $this->euc_substr($string,$start,$charset,$len);
01429 } elseif ($this->twoByteSets[$charset]) {
01430 return substr($string,$start*2,$len*2);
01431 } elseif ($this->fourByteSets[$charset]) {
01432 return substr($string,$start*4,$len*4);
01433 }
01434
01435
01436 return $len === NULL ? substr($string,$start) : substr($string,$start,$len);
01437 }
01438
01439
01440
01441
01442
01443
01444
01445
01446
01447
01448
01449 function strlen($charset,$string) {
01450 if ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'mbstring') {
01451 return mb_strlen($string,$charset);
01452 } elseif ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'iconv') {
01453 return iconv_strlen($string,$charset);
01454 } elseif ($charset == 'utf-8') {
01455 return $this->utf8_strlen($string);
01456 } elseif ($this->eucBasedSets[$charset]) {
01457 return $this->euc_strlen($string,$charset);
01458 } elseif ($this->twoByteSets[$charset]) {
01459 return strlen($string)/2;
01460 } elseif ($this->fourByteSets[$charset]) {
01461 return strlen($string)/4;
01462 }
01463
01464 return strlen($string);
01465 }
01466
01467
01468
01469
01470
01471
01472
01473
01474
01475
01476
01477 protected function cropMbstring($charset, $string, $len, $crop = '') {
01478 if (intval($len) === 0 || mb_strlen($string, $charset) <= abs($len)) {
01479 return $string;
01480 }
01481
01482 if ($len > 0) {
01483 $string = mb_substr($string, 0, $len, $charset) . $crop;
01484 } else {
01485 $string = $crop . mb_substr($string, $len, mb_strlen($string, $charset), $charset);
01486 }
01487
01488 return $string;
01489 }
01490
01491
01492
01493
01494
01495
01496
01497
01498
01499
01500
01501
01502
01503 function crop($charset,$string,$len,$crop='') {
01504 if ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'mbstring') {
01505 return $this->cropMbstring($charset, $string, $len, $crop);
01506 }
01507
01508 if (intval($len) == 0) return $string;
01509
01510 if ($charset == 'utf-8') {
01511 $i = $this->utf8_char2byte_pos($string,$len);
01512 } elseif ($this->eucBasedSets[$charset]) {
01513 $i = $this->euc_char2byte_pos($string,$len,$charset);
01514 } else {
01515 if ($len > 0) {
01516 $i = $len;
01517 } else {
01518 $i = strlen($string)+$len;
01519 if ($i<=0) $i = false;
01520 }
01521 }
01522
01523 if ($i === false) {
01524 return $string;
01525 } else {
01526 if ($len > 0) {
01527 if (strlen($string{$i})) {
01528 return substr($string,0,$i).$crop;
01529
01530 }
01531 } else {
01532 if (strlen($string{$i-1})) {
01533 return $crop.substr($string,$i);
01534 }
01535 }
01536
01537
01538
01539
01540
01541
01542
01543
01544
01545
01546 }
01547 return $string;
01548 }
01549
01550
01551
01552
01553
01554
01555
01556
01557
01558
01559
01560 function strtrunc($charset,$string,$len) {
01561 if ($len <= 0) return '';
01562
01563 if ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'mbstring') {
01564 return mb_strcut($string,0,$len,$charset);
01565 } elseif ($charset == 'utf-8') {
01566 return $this->utf8_strtrunc($string,$len);
01567 } elseif ($this->eucBasedSets[$charset]) {
01568 return $this->euc_strtrunc($string,$charset);
01569 } elseif ($this->twoByteSets[$charset]) {
01570 if ($len % 2) $len--;
01571 } elseif ($this->fourByteSets[$charset]) {
01572 $x = $len % 4;
01573 $len -= $x;
01574 }
01575
01576 return substr($string,0,$len);
01577 }
01578
01579
01580
01581
01582
01583
01584
01585
01586
01587
01588
01589
01590
01591
01592
01593
01594 function conv_case($charset,$string,$case) {
01595 if ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'mbstring') {
01596 if ($case == 'toLower') {
01597 $string = mb_strtolower($string,$charset);
01598 } else {
01599 $string = mb_strtoupper($string,$charset);
01600 }
01601 } elseif ($charset == 'utf-8') {
01602 $string = $this->utf8_char_mapping($string,'case',$case);
01603 } elseif (isset($this->eucBasedSets[$charset])) {
01604 $string = $this->euc_char_mapping($string,$charset,'case',$case);
01605 } else {
01606
01607 $string = $this->sb_char_mapping($string,$charset,'case',$case);
01608 }
01609
01610 return $string;
01611 }
01612
01613
01614
01615
01616
01617
01618
01619
01620 function specCharsToASCII($charset,$string) {
01621 if ($charset == 'utf-8') {
01622 $string = $this->utf8_char_mapping($string,'ascii');
01623 } elseif (isset($this->eucBasedSets[$charset])) {
01624 $string = $this->euc_char_mapping($string,$charset,'ascii');
01625 } else {
01626
01627 $string = $this->sb_char_mapping($string,$charset,'ascii');
01628 }
01629
01630 return $string;
01631 }
01632
01633
01634
01635
01636
01637
01638
01639
01640
01641
01642 public function getPreferredClientLanguage($languageCodesList) {
01643 $allLanguageCodes = array();
01644 $selectedLanguage = 'default';
01645
01646
01647 foreach ($this->charSetArray as $typo3Lang => $charSet) {
01648 $allLanguageCodes[$typo3Lang] = $typo3Lang;
01649 }
01650
01651
01652
01653
01654 foreach ($this->isoArray as $typo3Lang => $isoLang) {
01655 $isoLang = join('-', explode('_', $isoLang));
01656 $allLanguageCodes[$typo3Lang] = $isoLang;
01657 }
01658
01659
01660 $allLanguageCodes = array_flip($allLanguageCodes);
01661
01662
01663 $preferredLanguages = t3lib_div::trimExplode(',', $languageCodesList);
01664
01665 $sortedPreferredLanguages = array();
01666 foreach ($preferredLanguages as $preferredLanguage) {
01667 $quality = 1.0;
01668 if (strpos($preferredLanguage, ';q=') !== false) {
01669 list($preferredLanguage, $quality) = explode(';q=', $preferredLanguage);
01670 }
01671 $sortedPreferredLanguages[$preferredLanguage] = $quality;
01672 }
01673
01674
01675 arsort($sortedPreferredLanguages, SORT_NUMERIC);
01676 foreach ($sortedPreferredLanguages as $preferredLanguage => $quality) {
01677 if (isset($allLanguageCodes[$preferredLanguage])) {
01678 $selectedLanguage = $allLanguageCodes[$preferredLanguage];
01679 break;
01680 }
01681
01682
01683 list($preferredLanguage, $preferredCountry) = explode('-', $preferredLanguage);
01684 if (isset($allLanguageCodes[$preferredLanguage])) {
01685 $selectedLanguage = $allLanguageCodes[$preferredLanguage];
01686 break;
01687 }
01688 }
01689 if (!$selectedLanguage || $selectedLanguage == 'en') {
01690 $selectedLanguage = 'default';
01691 }
01692 return $selectedLanguage;
01693 }
01694
01695
01696
01697
01698
01699
01700
01701
01702
01703
01704
01705
01706
01707
01708
01709
01710
01711
01712
01713
01714
01715
01716
01717
01718
01719
01720 function sb_char_mapping($str,$charset,$mode,$opt='') {
01721 switch($mode) {
01722 case 'case':
01723 if (!$this->initCaseFolding($charset)) return $str;
01724 $map =& $this->caseFolding[$charset][$opt];
01725 break;
01726
01727 case 'ascii':
01728 if (!$this->initToASCII($charset)) return $str;
01729 $map =& $this->toASCII[$charset];
01730 break;
01731
01732 default:
01733 return $str;
01734 }
01735
01736 $out = '';
01737 for($i=0; strlen($str{$i}); $i++) {
01738 $c = $str{$i};
01739 if (isset($map[$c])) {
01740 $out .= $map[$c];
01741 } else {
01742 $out .= $c;
01743 }
01744 }
01745
01746 return $out;
01747 }
01748
01749
01750
01751
01752
01753
01754
01755
01756
01757
01758
01759
01760
01761
01762
01763
01764
01765
01766
01767
01768
01769
01770
01771
01772
01773
01774
01775 function utf8_substr($str,$start,$len=null) {
01776 if (!strcmp($len,'0')) return '';
01777
01778 $byte_start = $this->utf8_char2byte_pos($str,$start);
01779 if ($byte_start === false) {
01780 if ($start > 0) {
01781 return false;
01782 } else {
01783 $start = 0;
01784 }
01785 }
01786
01787 $str = substr($str,$byte_start);
01788
01789 if ($len!=null) {
01790 $byte_end = $this->utf8_char2byte_pos($str,$len);
01791 if ($byte_end === false)
01792 return $len<0 ? '' : $str;
01793 else
01794 return substr($str,0,$byte_end);
01795 }
01796 else return $str;
01797 }
01798
01799
01800
01801
01802
01803
01804
01805
01806
01807
01808 function utf8_strlen($str) {
01809 $n=0;
01810 for($i=0; strlen($str{$i}); $i++) {
01811 $c = ord($str{$i});
01812 if (!($c & 0x80))
01813 $n++;
01814 elseif (($c & 0xC0) == 0xC0)
01815 $n++;
01816 }
01817 return $n;
01818 }
01819
01820
01821
01822
01823
01824
01825
01826
01827
01828
01829 function utf8_strtrunc($str,$len) {
01830 $i = $len-1;
01831 if (ord($str{$i}) & 0x80) {
01832 for (; $i>0 && !(ord($str{$i}) & 0x40); $i--) ;
01833 if ($i <= 0) return '';
01834 for ($bc=0, $mbs=ord($str{$i}); $mbs & 0x80; $mbs = $mbs << 1) $bc++;
01835 if ($bc+$i > $len) return substr($str,0,$i);
01836
01837 }
01838 return substr($str,0,$len);
01839 }
01840
01841
01842
01843
01844
01845
01846
01847
01848
01849
01850
01851 function utf8_strpos($haystack,$needle,$offset=0) {
01852 if ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'mbstring') {
01853 return mb_strpos($haystack,$needle,$offset,'utf-8');
01854 } elseif ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'iconv') {
01855 return iconv_strpos($haystack,$needle,$offset,'utf-8');
01856 }
01857
01858 $byte_offset = $this->utf8_char2byte_pos($haystack,$offset);
01859 if ($byte_offset === false) return false;
01860
01861 $byte_pos = strpos($haystack,$needle,$byte_offset);
01862 if ($byte_pos === false) return false;
01863
01864 return $this->utf8_byte2char_pos($haystack,$byte_pos);
01865 }
01866
01867
01868
01869
01870
01871
01872
01873
01874
01875
01876 function utf8_strrpos($haystack,$needle) {
01877 if ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'mbstring') {
01878 return mb_strrpos($haystack,$needle,'utf-8');
01879 } elseif ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'iconv') {
01880 return iconv_strrpos($haystack,$needle,'utf-8');
01881 }
01882
01883 $byte_pos = strrpos($haystack,$needle);
01884 if ($byte_pos === false) return false;
01885
01886 return $this->utf8_byte2char_pos($haystack,$byte_pos);
01887 }
01888
01889
01890
01891
01892
01893
01894
01895
01896
01897
01898 function utf8_char2byte_pos($str,$pos) {
01899 $n = 0;
01900 $p = abs($pos);
01901
01902 if ($pos >= 0) {
01903 $i = 0;
01904 $d = 1;
01905 } else {
01906 $i = strlen($str)-1;
01907 $d = -1;
01908 }
01909
01910 for( ; strlen($str{$i}) && $n<$p; $i+=$d) {
01911 $c = (int)ord($str{$i});
01912 if (!($c & 0x80))
01913 $n++;
01914 elseif (($c & 0xC0) == 0xC0)
01915 $n++;
01916 }
01917 if (!strlen($str{$i})) return false;
01918
01919 if ($pos >= 0) {
01920
01921 while ((ord($str{$i}) & 0x80) && !(ord($str{$i}) & 0x40)) { $i++; }
01922 } else {
01923
01924 $i++;
01925 }
01926
01927 return $i;
01928 }
01929
01930
01931
01932
01933
01934
01935
01936
01937
01938
01939 function utf8_byte2char_pos($str,$pos) {
01940 $n = 0;
01941 for($i=$pos; $i>0; $i--) {
01942 $c = (int)ord($str{$i});
01943 if (!($c & 0x80))
01944 $n++;
01945 elseif (($c & 0xC0) == 0xC0)
01946 $n++;
01947 }
01948 if (!strlen($str{$i})) return false;
01949
01950 return $n;
01951 }
01952
01953
01954
01955
01956
01957
01958
01959
01960
01961
01962 function utf8_char_mapping($str,$mode,$opt='') {
01963 if (!$this->initUnicodeData($mode)) return $str;
01964
01965 $out = '';
01966 switch($mode) {
01967 case 'case':
01968 $map =& $this->caseFolding['utf-8'][$opt];
01969 break;
01970
01971 case 'ascii':
01972 $map =& $this->toASCII['utf-8'];
01973 break;
01974
01975 default:
01976 return $str;
01977 }
01978
01979 for($i=0; strlen($str{$i}); $i++) {
01980 $c = ord($str{$i});
01981 if (!($c & 0x80))
01982 $mbc = $str{$i};
01983 elseif (($c & 0xC0) == 0xC0) {
01984 for ($bc=0; $c & 0x80; $c = $c << 1) { $bc++; }
01985 $mbc = substr($str,$i,$bc);
01986 $i += $bc-1;
01987 }
01988
01989 if (isset($map[$mbc])) {
01990 $out .= $map[$mbc];
01991 } else {
01992 $out .= $mbc;
01993 }
01994 }
01995
01996 return $out;
01997 }
01998
01999
02000
02001
02002
02003
02004
02005
02006
02007
02008
02009
02010
02011
02012
02013
02014
02015
02016
02017
02018
02019
02020
02021
02022
02023
02024
02025
02026
02027
02028
02029
02030
02031
02032
02033
02034
02035
02036
02037
02038 function euc_strtrunc($str,$len,$charset) {
02039 $sjis = ($charset == 'shift_jis');
02040 for ($i=0; strlen($str{$i}) && $i<$len; $i++) {
02041 $c = ord($str{$i});
02042 if ($sjis) {
02043 if (($c >= 0x80 && $c < 0xA0) || ($c >= 0xE0)) $i++;
02044 }
02045 else {
02046 if ($c >= 0x80) $i++;
02047 }
02048 }
02049 if (!strlen($str{$i})) return $str;
02050
02051 if ($i>$len) {
02052 return substr($str,0,$len-1);
02053 } else {
02054 return substr($str,0,$len);
02055 }
02056 }
02057
02058
02059
02060
02061
02062
02063
02064
02065
02066
02067
02068 function euc_substr($str,$start,$charset,$len=null) {
02069 $byte_start = $this->euc_char2byte_pos($str,$start,$charset);
02070 if ($byte_start === false) return false;
02071
02072 $str = substr($str,$byte_start);
02073
02074 if ($len!=null) {
02075 $byte_end = $this->euc_char2byte_pos($str,$len,$charset);
02076 if ($byte_end === false)
02077 return $str;
02078 else
02079 return substr($str,0,$byte_end);
02080 }
02081 else return $str;
02082 }
02083
02084
02085
02086
02087
02088
02089
02090
02091
02092
02093 function euc_strlen($str,$charset) {
02094 $sjis = ($charset == 'shift_jis');
02095 $n=0;
02096 for ($i=0; strlen($str{$i}); $i++) {
02097 $c = ord($str{$i});
02098 if ($sjis) {
02099 if (($c >= 0x80 && $c < 0xA0) || ($c >= 0xE0)) $i++;
02100 }
02101 else {
02102 if ($c >= 0x80) $i++;
02103 }
02104
02105 $n++;
02106 }
02107
02108 return $n;
02109 }
02110
02111
02112
02113
02114
02115
02116
02117
02118
02119
02120 function euc_char2byte_pos($str,$pos,$charset) {
02121 $sjis = ($charset == 'shift_jis');
02122 $n = 0;
02123 $p = abs($pos);
02124
02125 if ($pos >= 0) {
02126 $i = 0;
02127 $d = 1;
02128 } else {
02129 $i = strlen($str)-1;
02130 $d = -1;
02131 }
02132
02133 for ( ; strlen($str{$i}) && $n<$p; $i+=$d) {
02134 $c = ord($str{$i});
02135 if ($sjis) {
02136 if (($c >= 0x80 && $c < 0xA0) || ($c >= 0xE0)) $i+=$d;
02137 }
02138 else {
02139 if ($c >= 0x80) $i+=$d;
02140 }
02141
02142 $n++;
02143 }
02144 if (!strlen($str{$i})) return false;
02145
02146 if ($pos < 0) $i++;
02147
02148 return $i;
02149 }
02150
02151
02152
02153
02154
02155
02156
02157
02158
02159
02160
02161 function euc_char_mapping($str,$charset,$mode,$opt='') {
02162 switch($mode) {
02163 case 'case':
02164 if (!$this->initCaseFolding($charset)) return $str;
02165 $map =& $this->caseFolding[$charset][$opt];
02166 break;
02167
02168 case 'ascii':
02169 if (!$this->initToASCII($charset)) return $str;
02170 $map =& $this->toASCII[$charset];
02171 break;
02172
02173 default:
02174 return $str;
02175 }
02176
02177 $sjis = ($charset == 'shift_jis');
02178 $out = '';
02179 for($i=0; strlen($str{$i}); $i++) {
02180 $mbc = $str{$i};
02181 $c = ord($mbc);
02182
02183 if ($sjis) {
02184 if (($c >= 0x80 && $c < 0xA0) || ($c >= 0xE0)) {
02185 $mbc = substr($str,$i,2);
02186 $i++;
02187 }
02188 }
02189 else {
02190 if ($c >= 0x80) {
02191 $mbc = substr($str,$i,2);
02192 $i++;
02193 }
02194 }
02195
02196 if (isset($map[$mbc])) {
02197 $out .= $map[$mbc];
02198 } else {
02199 $out .= $mbc;
02200 }
02201 }
02202
02203 return $out;
02204 }
02205
02206 }
02207
02208 if (defined('TYPO3_MODE') && $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['t3lib/class.t3lib_cs.php']) {
02209 include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['t3lib/class.t3lib_cs.php']);
02210 }
02211
02212 ?>