|
TYPO3 API
SVNRelease
|
Public Member Functions | |
| parse_charset ($charset) | |
| get_locale_charset ($locale) | |
| conv ($str, $fromCS, $toCS, $useEntityForNoChar=0) | |
| convArray (&$array, $fromCS, $toCS, $useEntityForNoChar=0) | |
| utf8_encode ($str, $charset) | |
| utf8_decode ($str, $charset, $useEntityForNoChar=0) | |
| utf8_to_entities ($str) | |
| entities_to_utf8 ($str, $alsoStdHtmlEnt=0) | |
| utf8_to_numberarray ($str, $convEntities=0, $retChar=0) | |
| UnumberToChar ($cbyte) | |
| utf8CharToUnumber ($str, $hex=0) | |
| initCharset ($charset) | |
| initUnicodeData ($mode=NULL) | |
| initCaseFolding ($charset) | |
| initToASCII ($charset) | |
| substr ($charset, $string, $start, $len=NULL) | |
| strlen ($charset, $string) | |
| crop ($charset, $string, $len, $crop= '') | |
| strtrunc ($charset, $string, $len) | |
| conv_case ($charset, $string, $case) | |
| specCharsToASCII ($charset, $string) | |
| getPreferredClientLanguage ($languageCodesList) | |
| sb_char_mapping ($str, $charset, $mode, $opt= '') | |
| utf8_substr ($str, $start, $len=NULL) | |
| utf8_strlen ($str) | |
| utf8_strtrunc ($str, $len) | |
| utf8_strpos ($haystack, $needle, $offset=0) | |
| utf8_strrpos ($haystack, $needle) | |
| utf8_char2byte_pos ($str, $pos) | |
| utf8_byte2char_pos ($str, $pos) | |
| utf8_char_mapping ($str, $mode, $opt= '') | |
| euc_strtrunc ($str, $len, $charset) | |
| euc_substr ($str, $start, $charset, $len=NULL) | |
| euc_strlen ($str, $charset) | |
| euc_char2byte_pos ($str, $pos, $charset) | |
| euc_char_mapping ($str, $charset, $mode, $opt= '') | |
Public Attributes | |
| $noCharByteVal = 63 | |
| $parsedCharsets = array() | |
| $caseFolding = array() | |
| $toASCII = array() | |
| $twoByteSets | |
| $fourByteSets | |
| $eucBasedSets | |
| $synonyms | |
| $lang_to_script | |
| $script_to_charset_unix | |
| $script_to_charset_windows | |
| $locale_to_charset | |
| $charSetArray | |
| $isoArray | |
Protected Member Functions | |
| cropMbstring ($charset, $string, $len, $crop= '') | |
Definition at line 129 of file class.t3lib_cs.php.
| t3lib_cs::conv | ( | $ | str, |
| $ | fromCS, | ||
| $ | toCS, | ||
| $ | useEntityForNoChar = 0 |
||
| ) |
Convert from one charset to another charset.
| string | Input string |
| string | From charset (the current charset of the string) |
| string | To charset (the output charset wanted) |
| boolean | If set, then characters that are not available in the destination character set will be encoded as numeric entities |
Definition at line 625 of file class.t3lib_cs.php.
References $GLOBALS, utf8_decode(), and utf8_encode().
Referenced by convArray().
| t3lib_cs::conv_case | ( | $ | charset, |
| $ | string, | ||
| $ | case | ||
| ) |
Translates all characters of a string into their respective case values. Unlike strtolower() and strtoupper() this method is locale independent. Note that the string length may change! eg. lower case German "ß" (sharp S) becomes upper case "SS" Unit-tested by Kasper Real case folding is language dependent, this method ignores this fact.
| string | Character set of string |
| string | Input string to convert case for |
| string | Case keyword: "toLower" means lowercase conversion, anything else is uppercase (use "toUpper" ) |
Definition at line 1686 of file class.t3lib_cs.php.
References $GLOBALS, euc_char_mapping(), sb_char_mapping(), and utf8_char_mapping().
| t3lib_cs::convArray | ( | &$ | array, |
| $ | fromCS, | ||
| $ | toCS, | ||
| $ | useEntityForNoChar = 0 |
||
| ) |
Convert all elements in ARRAY with type string from one charset to another charset. NOTICE: Array is passed by reference!
| string | Input array, possibly multidimensional |
| string | From charset (the current charset of the string) |
| string | To charset (the output charset wanted) |
| boolean | If set, then characters that are not available in the destination character set will be encoded as numeric entities |
Definition at line 677 of file class.t3lib_cs.php.
| t3lib_cs::crop | ( | $ | charset, |
| $ | string, | ||
| $ | len, | ||
| $ | crop = '' |
||
| ) |
Truncates a string and pre-/appends a string. Unit tested by Kasper
| string | The character set |
| string | Character string |
| integer | Length (in characters) |
| string | Crop signifier |
Definition at line 1587 of file class.t3lib_cs.php.
References $GLOBALS, cropMbstring(), euc_char2byte_pos(), strlen(), substr(), and utf8_char2byte_pos().
| t3lib_cs::cropMbstring | ( | $ | charset, |
| $ | string, | ||
| $ | len, | ||
| $ | crop = '' |
||
| ) | [protected] |
Method to crop strings using the mb_substr function.
| string | The character set |
| string | String to be cropped |
| integer | Crop length (in characters) |
| string | Crop signifier |
Definition at line 1561 of file class.t3lib_cs.php.
Referenced by crop().
| t3lib_cs::entities_to_utf8 | ( | $ | str, |
| $ | alsoStdHtmlEnt = 0 |
||
| ) |
Converts numeric entities (UNICODE, eg. decimal (Ӓ) or hexadecimal ()) to UTF-8 multibyte chars
| string | Input string, UTF-8 |
| boolean | If set, then all string-HTML entities (like & or will be converted as well) |
Definition at line 844 of file class.t3lib_cs.php.
References substr(), UnumberToChar(), and utf8_encode().
Referenced by utf8_to_numberarray().
| t3lib_cs::euc_char2byte_pos | ( | $ | str, |
| $ | pos, | ||
| $ | charset | ||
| ) |
Translates a character position into an 'absolute' byte position.
| string | EUC multibyte character string |
| integer | character position (negative values start from the end) |
| string | the charset |
Definition at line 2244 of file class.t3lib_cs.php.
References strlen().
Referenced by crop(), and euc_substr().
| t3lib_cs::euc_char_mapping | ( | $ | str, |
| $ | charset, | ||
| $ | mode, | ||
| $ | opt = '' |
||
| ) |
Maps all characters of a string in the EUC charset family.
| string | EUC multibyte character string |
| string | the charset |
| string | mode: 'case' (case folding) or 'ascii' (ASCII transliteration) |
| string | 'case': conversion 'toLower' or 'toUpper' |
Definition at line 2293 of file class.t3lib_cs.php.
References initCaseFolding(), initToASCII(), strlen(), and substr().
Referenced by conv_case(), and specCharsToASCII().
| t3lib_cs::euc_strlen | ( | $ | str, |
| $ | charset | ||
| ) |
Counts the number of characters of a string in the EUC charset family.
| string | EUC multibyte character string |
| string | the charset |
Definition at line 2213 of file class.t3lib_cs.php.
References strlen().
Referenced by strlen().
| t3lib_cs::euc_strtrunc | ( | $ | str, |
| $ | len, | ||
| $ | charset | ||
| ) |
Cuts a string in the EUC charset family short at a given byte length.
| string | EUC multibyte character string |
| integer | the byte length |
| string | the charset |
Definition at line 2144 of file class.t3lib_cs.php.
References strlen(), and substr().
Referenced by strtrunc().
| t3lib_cs::euc_substr | ( | $ | str, |
| $ | start, | ||
| $ | charset, | ||
| $ | len = NULL |
||
| ) |
Returns a part of a string in the EUC charset family.
| string | EUC multibyte character string |
| integer | start position (character position) |
| string | the charset |
| integer | length (in characters) |
Definition at line 2180 of file class.t3lib_cs.php.
References euc_char2byte_pos(), and substr().
Referenced by substr().
| t3lib_cs::get_locale_charset | ( | $ | locale | ) |
Get the charset of a locale.
ln language ln_CN language / country ln_CN.cs language / country / charset ln_CN.cs language / country / charset / modifier
| string | Locale string |
Definition at line 571 of file class.t3lib_cs.php.
References parse_charset().
| t3lib_cs::getPreferredClientLanguage | ( | $ | languageCodesList | ) |
converts the language codes that we get from the client (usually HTTP_ACCEPT_LANGUAGE) into a TYPO3-readable language code
| $languageCodesList | list of language codes. something like 'de,en-us;q=0.9,de-de;q=0.7,es-cl;q=0.6,en;q=0.4,es;q=0.3,zh;q=0.1' see http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.4 |
Definition at line 1734 of file class.t3lib_cs.php.
References t3lib_div\trimExplode().
| t3lib_cs::initCaseFolding | ( | $ | charset | ) |
This function initializes the folding table for a charset other than UTF-8. This function is automatically called by the case folding functions.
| string | Charset for which to initialize case folding. |
Definition at line 1349 of file class.t3lib_cs.php.
References t3lib_div\getFileAbsFileName(), initCharset(), initUnicodeData(), utf8_decode(), and t3lib_div\writeFileToTypo3tempDir().
Referenced by euc_char_mapping(), and sb_char_mapping().
| t3lib_cs::initCharset | ( | $ | charset | ) |
This will initialize a charset for use if it's defined in the PATH_t3lib.'csconvtbl/' folder This function is automatically called by the conversion functions
PLEASE SEE: http://www.unicode.org/Public/MAPPINGS/
| string | The charset to be initialized. Use lowercase charset always (the charset must match exactly with a filename in csconvtbl/ folder ([charset].tbl) |
Definition at line 1029 of file class.t3lib_cs.php.
References $value, t3lib_div\getFileAbsFileName(), substr(), t3lib_div\trimExplode(), UnumberToChar(), t3lib_div\validPathStr(), and t3lib_div\writeFileToTypo3tempDir().
Referenced by initCaseFolding(), initToASCII(), utf8_decode(), and utf8_encode().
| t3lib_cs::initToASCII | ( | $ | charset | ) |
This function initializes the to-ASCII conversion table for a charset other than UTF-8. This function is automatically called by the ASCII transliteration functions.
| string | Charset for which to initialize conversion. |
Definition at line 1419 of file class.t3lib_cs.php.
References t3lib_div\getFileAbsFileName(), initCharset(), initUnicodeData(), utf8_decode(), and t3lib_div\writeFileToTypo3tempDir().
Referenced by euc_char_mapping(), and sb_char_mapping().
| t3lib_cs::initUnicodeData | ( | $ | mode = NULL | ) |
This function initializes all UTF-8 character data tables.
PLEASE SEE: http://www.unicode.org/Public/UNIDATA/
| string | Mode ("case", "ascii", ...) |
Definition at line 1097 of file class.t3lib_cs.php.
References t3lib_div\getFileAbsFileName(), t3lib_div\trimExplode(), UnumberToChar(), t3lib_div\validPathStr(), and t3lib_div\writeFileToTypo3tempDir().
Referenced by initCaseFolding(), initToASCII(), and utf8_char_mapping().
| t3lib_cs::parse_charset | ( | $ | charset | ) |
Normalize - changes input character set to lowercase letters.
| string | Input charset |
Definition at line 550 of file class.t3lib_cs.php.
Referenced by get_locale_charset().
| t3lib_cs::sb_char_mapping | ( | $ | str, |
| $ | charset, | ||
| $ | mode, | ||
| $ | opt = '' |
||
| ) |
Maps all characters of a string in a single byte charset.
| string | the string |
| string | the charset |
| string | mode: 'case' (case folding) or 'ascii' (ASCII transliteration) |
| string | 'case': conversion 'toLower' or 'toUpper' |
Definition at line 1804 of file class.t3lib_cs.php.
References initCaseFolding(), initToASCII(), and strlen().
Referenced by conv_case(), and specCharsToASCII().
| t3lib_cs::specCharsToASCII | ( | $ | charset, |
| $ | string | ||
| ) |
Converts special chars (like æøåÆØÅ, umlauts etc) to ascii equivalents (usually double-bytes, like æ => ae etc.)
| string | Character set of string |
| string | Input string to convert |
Definition at line 1712 of file class.t3lib_cs.php.
References euc_char_mapping(), sb_char_mapping(), and utf8_char_mapping().
| t3lib_cs::strlen | ( | $ | charset, |
| $ | string | ||
| ) |
Counts the number of characters. Unit-tested by Kasper (single byte charsets only)
| string | The character set |
| string | Character string |
Definition at line 1533 of file class.t3lib_cs.php.
References $GLOBALS, euc_strlen(), and utf8_strlen().
Referenced by crop(), euc_char2byte_pos(), euc_char_mapping(), euc_strlen(), euc_strtrunc(), sb_char_mapping(), utf8_byte2char_pos(), utf8_char2byte_pos(), utf8_char_mapping(), utf8_decode(), utf8_encode(), utf8_strlen(), utf8_to_entities(), and utf8_to_numberarray().
| t3lib_cs::strtrunc | ( | $ | charset, |
| $ | string, | ||
| $ | len | ||
| ) |
Cuts a string short at a given byte length.
| string | The character set |
| string | Character string |
| integer | The byte length |
Definition at line 1648 of file class.t3lib_cs.php.
References $GLOBALS, euc_strtrunc(), substr(), and utf8_strtrunc().
| t3lib_cs::substr | ( | $ | charset, |
| $ | string, | ||
| $ | start, | ||
| $ | len = NULL |
||
| ) |
Returns a part of a string. Unit-tested by Kasper (single byte charsets only)
| string | The character set |
| string | Character string |
| integer | Start position (character position) |
| integer | Length (in characters) |
Definition at line 1478 of file class.t3lib_cs.php.
References $GLOBALS, euc_substr(), and utf8_substr().
Referenced by crop(), entities_to_utf8(), euc_char_mapping(), euc_strtrunc(), euc_substr(), initCharset(), strtrunc(), utf8_char_mapping(), utf8_decode(), utf8_encode(), utf8_strtrunc(), utf8_substr(), utf8_to_entities(), utf8_to_numberarray(), and utf8CharToUnumber().
| t3lib_cs::UnumberToChar | ( | $ | cbyte | ) |
Converts a UNICODE number to a UTF-8 multibyte character Algorithm based on script found at From: http://czyborra.com/utf/ Unit-tested by Kasper
The binary representation of the character's integer value is thus simply spread across the bytes and the number of high bits set in the lead byte announces the number of bytes in the multibyte sequence:
bytes | bits | representation 1 | 7 | 0vvvvvvv 2 | 11 | 110vvvvv 10vvvvvv 3 | 16 | 1110vvvv 10vvvvvv 10vvvvvv 4 | 21 | 11110vvv 10vvvvvv 10vvvvvv 10vvvvvv 5 | 26 | 111110vv 10vvvvvv 10vvvvvv 10vvvvvv 10vvvvvv 6 | 31 | 1111110v 10vvvvvv 10vvvvvv 10vvvvvv 10vvvvvv 10vvvvvv
| integer | UNICODE integer |
Definition at line 934 of file class.t3lib_cs.php.
Referenced by entities_to_utf8(), initCharset(), and initUnicodeData().
| t3lib_cs::utf8_byte2char_pos | ( | $ | str, |
| $ | pos | ||
| ) |
Translates an 'absolute' byte position into a character position. Unit tested by Kasper.
| string | UTF-8 string |
| integer | byte position |
Definition at line 2049 of file class.t3lib_cs.php.
References strlen().
Referenced by utf8_strpos(), and utf8_strrpos().
| t3lib_cs::utf8_char2byte_pos | ( | $ | str, |
| $ | pos | ||
| ) |
Translates a character position into an 'absolute' byte position. Unit tested by Kasper.
| string | UTF-8 string |
| integer | Character position (negative values start from the end) |
Definition at line 2000 of file class.t3lib_cs.php.
References strlen().
Referenced by crop(), utf8_strpos(), and utf8_substr().
| t3lib_cs::utf8_char_mapping | ( | $ | str, |
| $ | mode, | ||
| $ | opt = '' |
||
| ) |
Maps all characters of an UTF-8 string.
| string | UTF-8 string |
| string | mode: 'case' (case folding) or 'ascii' (ASCII transliteration) |
| string | 'case': conversion 'toLower' or 'toUpper' |
Definition at line 2078 of file class.t3lib_cs.php.
References initUnicodeData(), strlen(), and substr().
Referenced by conv_case(), and specCharsToASCII().
| t3lib_cs::utf8_decode | ( | $ | str, |
| $ | charset, | ||
| $ | useEntityForNoChar = 0 |
||
| ) |
Converts $str from UTF-8 to $charset
| string | String in UTF-8 to convert to local charset |
| string | Charset, lowercase. Must be found in csconvtbl/ folder. |
| boolean | If set, then characters that are not available in the destination character set will be encoded as numeric entities |
Definition at line 748 of file class.t3lib_cs.php.
References initCharset(), strlen(), substr(), and utf8CharToUnumber().
Referenced by conv(), initCaseFolding(), and initToASCII().
| t3lib_cs::utf8_encode | ( | $ | str, |
| $ | charset | ||
| ) |
Converts $str from $charset to UTF-8
| string | String in local charset to convert to UTF-8 |
| string | Charset, lowercase. Must be found in csconvtbl/ folder. |
Definition at line 694 of file class.t3lib_cs.php.
References initCharset(), strlen(), and substr().
Referenced by conv(), and entities_to_utf8().
| t3lib_cs::utf8_strlen | ( | $ | str | ) |
Counts the number of characters of a string in UTF-8. Unit-tested by Kasper and works 100% like strlen() / mb_strlen()
| string | UTF-8 multibyte character string |
Definition at line 1896 of file class.t3lib_cs.php.
References strlen().
Referenced by strlen().
| t3lib_cs::utf8_strpos | ( | $ | haystack, |
| $ | needle, | ||
| $ | offset = 0 |
||
| ) |
Find position of first occurrence of a string, both arguments are in UTF-8.
| string | UTF-8 string to search in |
| string | UTF-8 string to search for |
| integer | Positition to start the search |
Definition at line 1947 of file class.t3lib_cs.php.
References $GLOBALS, utf8_byte2char_pos(), and utf8_char2byte_pos().
| t3lib_cs::utf8_strrpos | ( | $ | haystack, |
| $ | needle | ||
| ) |
Find position of last occurrence of a char in a string, both arguments are in UTF-8.
| string | UTF-8 string to search in |
| string | UTF-8 character to search for (single character) |
Definition at line 1976 of file class.t3lib_cs.php.
References $GLOBALS, and utf8_byte2char_pos().
| t3lib_cs::utf8_strtrunc | ( | $ | str, |
| $ | len | ||
| ) |
Truncates a string in UTF-8 short at a given byte length.
| string | UTF-8 multibyte character string |
| integer | the byte length |
Definition at line 1921 of file class.t3lib_cs.php.
References substr().
Referenced by strtrunc().
| t3lib_cs::utf8_substr | ( | $ | str, |
| $ | start, | ||
| $ | len = NULL |
||
| ) |
Returns a part of a UTF-8 string. Unit-tested by Kasper and works 100% like substr() / mb_substr() for full range of $start/$len
| string | UTF-8 string |
| integer | Start position (character position) |
| integer | Length (in characters) |
Definition at line 1855 of file class.t3lib_cs.php.
References substr(), and utf8_char2byte_pos().
Referenced by substr().
| t3lib_cs::utf8_to_entities | ( | $ | str | ) |
Converts all chars > 127 to numeric entities.
| string | Input string |
Definition at line 805 of file class.t3lib_cs.php.
References strlen(), substr(), and utf8CharToUnumber().
| t3lib_cs::utf8_to_numberarray | ( | $ | str, |
| $ | convEntities = 0, |
||
| $ | retChar = 0 |
||
| ) |
Converts all chars in the input UTF-8 string into integer numbers returned in an array
| string | Input string, UTF-8 |
| boolean | If set, then all HTML entities (like & or or { or 㽝) will be detected as characters. |
| boolean | If set, then instead of integer numbers the real UTF-8 char is returned. |
Definition at line 878 of file class.t3lib_cs.php.
References entities_to_utf8(), strlen(), substr(), and utf8CharToUnumber().
| t3lib_cs::utf8CharToUnumber | ( | $ | str, |
| $ | hex = 0 |
||
| ) |
Converts a UTF-8 Multibyte character to a UNICODE number Unit-tested by Kasper
| string | UTF-8 multibyte character string |
| boolean | If set, then a hex. number is returned. |
Definition at line 989 of file class.t3lib_cs.php.
References substr().
Referenced by utf8_decode(), utf8_to_entities(), and utf8_to_numberarray().
| t3lib_cs::$caseFolding = array() |
Definition at line 136 of file class.t3lib_cs.php.
| t3lib_cs::$charSetArray |
Definition at line 468 of file class.t3lib_cs.php.
| t3lib_cs::$eucBasedSets |
array(
'gb2312' => 1,
'big5' => 1,
'euc-kr' => 1,
'shift_jis' => 1,
)
Definition at line 153 of file class.t3lib_cs.php.
| t3lib_cs::$fourByteSets |
array(
'ucs-4' => 1,
'utf-32' => 1,
)
Definition at line 147 of file class.t3lib_cs.php.
| t3lib_cs::$isoArray |
array(
'ba' => 'bs',
'br' => 'pt_BR',
'ch' => 'zh_CN',
'cz' => 'cs',
'dk' => 'da',
'si' => 'sl',
'se' => 'sv',
'gl' => 'kl',
'gr' => 'el',
'hk' => 'zh_HK',
'kr' => 'ko',
'ua' => 'uk',
'jp' => 'ja',
'qc' => 'fr_CA',
'vn' => 'vi',
'ge' => 'ka',
'ga' => 'gl',
)
Definition at line 523 of file class.t3lib_cs.php.
| t3lib_cs::$lang_to_script |
Definition at line 249 of file class.t3lib_cs.php.
| t3lib_cs::$locale_to_charset |
array(
'japanese.euc' => 'euc-jp',
'ja_jp.ujis' => 'euc-jp',
'korean.euc' => 'euc-kr',
'sr@Latn' => 'iso-8859-2',
'zh_cn' => 'gb2312',
'zh_hk' => 'big5',
'zh_tw' => 'big5',
)
Definition at line 456 of file class.t3lib_cs.php.
| t3lib_cs::$noCharByteVal = 63 |
Definition at line 130 of file class.t3lib_cs.php.
| t3lib_cs::$parsedCharsets = array() |
Definition at line 133 of file class.t3lib_cs.php.
| t3lib_cs::$script_to_charset_unix |
array(
'west_european' => 'iso-8859-1',
'estonian' => 'iso-8859-1',
'east_european' => 'iso-8859-2',
'baltic' => 'iso-8859-4',
'cyrillic' => 'iso-8859-5',
'arabic' => 'iso-8859-6',
'greek' => 'iso-8859-7',
'hebrew' => 'iso-8859-8',
'turkish' => 'iso-8859-9',
'thai' => 'iso-8859-11',
'lithuanian' => 'iso-8859-13',
'chinese' => 'gb2312',
'japanese' => 'euc-jp',
'korean' => 'euc-kr',
'simpl_chinese' => 'gb2312',
'trad_chinese' => 'big5',
'vietnamese' => '',
'unicode' => 'utf-8',
'albanian' => 'utf-8'
)
Definition at line 410 of file class.t3lib_cs.php.
| t3lib_cs::$script_to_charset_windows |
array(
'east_european' => 'windows-1250',
'cyrillic' => 'windows-1251',
'west_european' => 'windows-1252',
'greek' => 'windows-1253',
'turkish' => 'windows-1254',
'hebrew' => 'windows-1255',
'arabic' => 'windows-1256',
'baltic' => 'windows-1257',
'estonian' => 'windows-1257',
'lithuanian' => 'windows-1257',
'vietnamese' => 'windows-1258',
'thai' => 'cp874',
'korean' => 'cp949',
'chinese' => 'gb2312',
'japanese' => 'shift_jis',
'simpl_chinese' => 'gb2312',
'trad_chinese' => 'big5',
'albanian' => 'windows-1250',
'unicode' => 'utf-8'
)
Definition at line 433 of file class.t3lib_cs.php.
| t3lib_cs::$synonyms |
Definition at line 162 of file class.t3lib_cs.php.
| t3lib_cs::$toASCII = array() |
Definition at line 139 of file class.t3lib_cs.php.
| t3lib_cs::$twoByteSets |
array(
'ucs-2' => 1,
)
Definition at line 142 of file class.t3lib_cs.php.
1.7.5.1