|
TYPO3 API
SVNRelease
|
00001 <?php 00002 00003 /* 00004 * This file is part of SwiftMailer. 00005 * (c) 2004-2009 Chris Corbyn 00006 * 00007 * For the full copyright and license information, please view the LICENSE 00008 * file that was distributed with this source code. 00009 */ 00010 00011 //@require 'Swift/CharacterReader.php'; 00012 00013 /** 00014 * Analyzes UTF-8 characters. 00015 * @package Swift 00016 * @subpackage Encoder 00017 * @author Chris Corbyn 00018 * @author Xavier De Cock <xdecock@gmail.com> 00019 */ 00020 class Swift_CharacterReader_Utf8Reader 00021 implements Swift_CharacterReader 00022 { 00023 00024 /** Pre-computed for optimization */ 00025 private static $length_map=array( 00026 //N=0,1,2,3,4,5,6,7,8,9,A,B,C,D,E,F, 00027 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, //0x0N 00028 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, //0x1N 00029 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, //0x2N 00030 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, //0x3N 00031 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, //0x4N 00032 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, //0x5N 00033 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, //0x6N 00034 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, //0x7N 00035 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, //0x8N 00036 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, //0x9N 00037 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, //0xAN 00038 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, //0xBN 00039 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, //0xCN 00040 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, //0xDN 00041 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, //0xEN 00042 4,4,4,4,4,4,4,4,5,5,5,5,6,6,0,0 //0xFN 00043 ); 00044 private static $s_length_map=array( 00045 "\x00"=>1, "\x01"=>1, "\x02"=>1, "\x03"=>1, "\x04"=>1, "\x05"=>1, "\x06"=>1, "\x07"=>1, 00046 "\x08"=>1, "\x09"=>1, "\x0a"=>1, "\x0b"=>1, "\x0c"=>1, "\x0d"=>1, "\x0e"=>1, "\x0f"=>1, 00047 "\x10"=>1, "\x11"=>1, "\x12"=>1, "\x13"=>1, "\x14"=>1, "\x15"=>1, "\x16"=>1, "\x17"=>1, 00048 "\x18"=>1, "\x19"=>1, "\x1a"=>1, "\x1b"=>1, "\x1c"=>1, "\x1d"=>1, "\x1e"=>1, "\x1f"=>1, 00049 "\x20"=>1, "\x21"=>1, "\x22"=>1, "\x23"=>1, "\x24"=>1, "\x25"=>1, "\x26"=>1, "\x27"=>1, 00050 "\x28"=>1, "\x29"=>1, "\x2a"=>1, "\x2b"=>1, "\x2c"=>1, "\x2d"=>1, "\x2e"=>1, "\x2f"=>1, 00051 "\x30"=>1, "\x31"=>1, "\x32"=>1, "\x33"=>1, "\x34"=>1, "\x35"=>1, "\x36"=>1, "\x37"=>1, 00052 "\x38"=>1, "\x39"=>1, "\x3a"=>1, "\x3b"=>1, "\x3c"=>1, "\x3d"=>1, "\x3e"=>1, "\x3f"=>1, 00053 "\x40"=>1, "\x41"=>1, "\x42"=>1, "\x43"=>1, "\x44"=>1, "\x45"=>1, "\x46"=>1, "\x47"=>1, 00054 "\x48"=>1, "\x49"=>1, "\x4a"=>1, "\x4b"=>1, "\x4c"=>1, "\x4d"=>1, "\x4e"=>1, "\x4f"=>1, 00055 "\x50"=>1, "\x51"=>1, "\x52"=>1, "\x53"=>1, "\x54"=>1, "\x55"=>1, "\x56"=>1, "\x57"=>1, 00056 "\x58"=>1, "\x59"=>1, "\x5a"=>1, "\x5b"=>1, "\x5c"=>1, "\x5d"=>1, "\x5e"=>1, "\x5f"=>1, 00057 "\x60"=>1, "\x61"=>1, "\x62"=>1, "\x63"=>1, "\x64"=>1, "\x65"=>1, "\x66"=>1, "\x67"=>1, 00058 "\x68"=>1, "\x69"=>1, "\x6a"=>1, "\x6b"=>1, "\x6c"=>1, "\x6d"=>1, "\x6e"=>1, "\x6f"=>1, 00059 "\x70"=>1, "\x71"=>1, "\x72"=>1, "\x73"=>1, "\x74"=>1, "\x75"=>1, "\x76"=>1, "\x77"=>1, 00060 "\x78"=>1, "\x79"=>1, "\x7a"=>1, "\x7b"=>1, "\x7c"=>1, "\x7d"=>1, "\x7e"=>1, "\x7f"=>1, 00061 "\x80"=>0, "\x81"=>0, "\x82"=>0, "\x83"=>0, "\x84"=>0, "\x85"=>0, "\x86"=>0, "\x87"=>0, 00062 "\x88"=>0, "\x89"=>0, "\x8a"=>0, "\x8b"=>0, "\x8c"=>0, "\x8d"=>0, "\x8e"=>0, "\x8f"=>0, 00063 "\x90"=>0, "\x91"=>0, "\x92"=>0, "\x93"=>0, "\x94"=>0, "\x95"=>0, "\x96"=>0, "\x97"=>0, 00064 "\x98"=>0, "\x99"=>0, "\x9a"=>0, "\x9b"=>0, "\x9c"=>0, "\x9d"=>0, "\x9e"=>0, "\x9f"=>0, 00065 "\xa0"=>0, "\xa1"=>0, "\xa2"=>0, "\xa3"=>0, "\xa4"=>0, "\xa5"=>0, "\xa6"=>0, "\xa7"=>0, 00066 "\xa8"=>0, "\xa9"=>0, "\xaa"=>0, "\xab"=>0, "\xac"=>0, "\xad"=>0, "\xae"=>0, "\xaf"=>0, 00067 "\xb0"=>0, "\xb1"=>0, "\xb2"=>0, "\xb3"=>0, "\xb4"=>0, "\xb5"=>0, "\xb6"=>0, "\xb7"=>0, 00068 "\xb8"=>0, "\xb9"=>0, "\xba"=>0, "\xbb"=>0, "\xbc"=>0, "\xbd"=>0, "\xbe"=>0, "\xbf"=>0, 00069 "\xc0"=>2, "\xc1"=>2, "\xc2"=>2, "\xc3"=>2, "\xc4"=>2, "\xc5"=>2, "\xc6"=>2, "\xc7"=>2, 00070 "\xc8"=>2, "\xc9"=>2, "\xca"=>2, "\xcb"=>2, "\xcc"=>2, "\xcd"=>2, "\xce"=>2, "\xcf"=>2, 00071 "\xd0"=>2, "\xd1"=>2, "\xd2"=>2, "\xd3"=>2, "\xd4"=>2, "\xd5"=>2, "\xd6"=>2, "\xd7"=>2, 00072 "\xd8"=>2, "\xd9"=>2, "\xda"=>2, "\xdb"=>2, "\xdc"=>2, "\xdd"=>2, "\xde"=>2, "\xdf"=>2, 00073 "\xe0"=>3, "\xe1"=>3, "\xe2"=>3, "\xe3"=>3, "\xe4"=>3, "\xe5"=>3, "\xe6"=>3, "\xe7"=>3, 00074 "\xe8"=>3, "\xe9"=>3, "\xea"=>3, "\xeb"=>3, "\xec"=>3, "\xed"=>3, "\xee"=>3, "\xef"=>3, 00075 "\xf0"=>4, "\xf1"=>4, "\xf2"=>4, "\xf3"=>4, "\xf4"=>4, "\xf5"=>4, "\xf6"=>4, "\xf7"=>4, 00076 "\xf8"=>5, "\xf9"=>5, "\xfa"=>5, "\xfb"=>5, "\xfc"=>6, "\xfd"=>6, "\xfe"=>0, "\xff"=>0, 00077 ); 00078 00079 /** 00080 * Returns the complete charactermap 00081 * 00082 * @param string $string 00083 * @param int $startOffset 00084 * @param array $currentMap 00085 * @param mixed $ignoredChars 00086 */ 00087 public function getCharPositions($string, $startOffset, &$currentMap, &$ignoredChars) 00088 { 00089 if (!isset($currentMap['i']) || !isset($currentMap['p'])) 00090 { 00091 $currentMap['p'] = $currentMap['i'] = array(); 00092 } 00093 $strlen=strlen($string); 00094 $charPos=count($currentMap['p']); 00095 $foundChars=0; 00096 $invalid=false; 00097 for ($i=0; $i<$strlen; ++$i) 00098 { 00099 $char=$string[$i]; 00100 $size=self::$s_length_map[$char]; 00101 if ($size==0) 00102 { 00103 /* char is invalid, we must wait for a resync */ 00104 $invalid=true; 00105 continue; 00106 } 00107 else 00108 { 00109 if ($invalid==true) 00110 { 00111 /* We mark the chars as invalid and start a new char */ 00112 $currentMap['p'][$charPos+$foundChars]=$startOffset+$i; 00113 $currentMap['i'][$charPos+$foundChars]=true; 00114 ++$foundChars; 00115 $invalid=false; 00116 } 00117 if (($i+$size) > $strlen){ 00118 $ignoredChars=substr($string, $i); 00119 break; 00120 } 00121 for ($j=1; $j<$size; ++$j) 00122 { 00123 $char=$string[$i+$j]; 00124 if ($char>"\x7F" && $char<"\xC0") 00125 { 00126 // Valid - continue parsing 00127 } 00128 else 00129 { 00130 /* char is invalid, we must wait for a resync */ 00131 $invalid=true; 00132 continue 2; 00133 } 00134 } 00135 /* Ok we got a complete char here */ 00136 $lastChar=$currentMap['p'][$charPos+$foundChars]=$startOffset+$i+$size; 00137 $i+=$j-1; 00138 ++$foundChars; 00139 } 00140 } 00141 return $foundChars; 00142 } 00143 00144 /** 00145 * Returns mapType 00146 * @int mapType 00147 */ 00148 public function getMapType() 00149 { 00150 return self::MAP_TYPE_POSITIONS; 00151 } 00152 00153 /** 00154 * Returns an integer which specifies how many more bytes to read. 00155 * A positive integer indicates the number of more bytes to fetch before invoking 00156 * this method again. 00157 * A value of zero means this is already a valid character. 00158 * A value of -1 means this cannot possibly be a valid character. 00159 * @param string $bytes 00160 * @return int 00161 */ 00162 public function validateByteSequence($bytes, $size) 00163 { 00164 if ($size<1){ 00165 return -1; 00166 } 00167 $needed = self::$length_map[$bytes[0]] - $size; 00168 return ($needed > -1) 00169 ? $needed 00170 : -1 00171 ; 00172 } 00173 00174 /** 00175 * Returns the number of bytes which should be read to start each character. 00176 * @return int 00177 */ 00178 public function getInitialByteSize() 00179 { 00180 return 1; 00181 } 00182 00183 }
1.8.0