|
TYPO3 API
SVNRelease
|
00001 <?php 00002 /** 00003 * Usage: Run *every* variable passed in through it. 00004 * The goal of this function is to be a generic function that can be used to 00005 * parse almost any input and render it XSS safe. For more information on 00006 * actual XSS attacks, check out http://ha.ckers.org/xss.html. Another 00007 * excellent site is the XSS Database which details each attack and how it 00008 * works. 00009 * 00010 * Used with permission by the author. 00011 * URL: http://quickwired.com/smallprojects/php_xss_filter_function.php 00012 * 00013 * Check XSS attacks on http://ha.ckers.org/xss.html 00014 * 00015 * License: 00016 * This code is public domain, you are free to do whatever you want with it, 00017 * including adding it to your own project which can be under any license. 00018 * 00019 * $Id: RemoveXSS.php 10547 2011-02-22 20:03:57Z lolli $ 00020 * 00021 * @author Travis Puderbaugh <kallahar@quickwired.com> 00022 * @author Jigal van Hemert <jigal@xs4all.nl> 00023 * @package RemoveXSS 00024 */ 00025 final class RemoveXSS { 00026 00027 /** 00028 * Removes potential XSS code from an input string. 00029 * 00030 * Using an external class by Travis Puderbaugh <kallahar@quickwired.com> 00031 * 00032 * @param string Input string 00033 * @param string replaceString for inserting in keywords (which destroyes the tags) 00034 * @return string Input string with potential XSS code removed 00035 */ 00036 public static function process($val, $replaceString = '<x>') { 00037 // don't use empty $replaceString because then no XSS-remove will be done 00038 if ($replaceString == '') { 00039 $replaceString = '<x>'; 00040 } 00041 // remove all non-printable characters. CR(0a) and LF(0b) and TAB(9) are allowed 00042 // this prevents some character re-spacing such as <java\0script> 00043 // note that you have to handle splits with \n, \r, and \t later since they *are* allowed in some inputs 00044 $val = preg_replace('/([\x00-\x08][\x0b-\x0c][\x0e-\x19])/', '', $val); 00045 00046 // straight replacements, the user should never need these since they're normal characters 00047 // this prevents like <IMG SRC=@avascript:alert('XSS')> 00048 $searchHexEncodings = '/&#[xX]0{0,8}(21|22|23|24|25|26|27|28|29|2a|2b|2d|2f|30|31|32|33|34|35|36|37|38|39|3a|3b|3d|3f|40|41|42|43|44|45|46|47|48|49|4a|4b|4c|4d|4e|4f|50|51|52|53|54|55|56|57|58|59|5a|5b|5c|5d|5e|5f|60|61|62|63|64|65|66|67|68|69|6a|6b|6c|6d|6e|6f|70|71|72|73|74|75|76|77|78|79|7a|7b|7c|7d|7e);?/ie'; 00049 $searchUnicodeEncodings = '/�{0,8}(33|34|35|36|37|38|39|40|41|42|43|45|47|48|49|50|51|52|53|54|55|56|57|58|59|61|63|64|65|66|67|68|69|70|71|72|73|74|75|76|77|78|79|80|81|82|83|84|85|86|87|88|89|90|91|92|93|94|95|96|97|98|99|100|101|102|103|104|105|106|107|108|109|110|111|112|113|114|115|116|117|118|119|120|121|122|123|124|125|126);?/ie'; 00050 while (preg_match($searchHexEncodings, $val) || preg_match($searchUnicodeEncodings, $val)) { 00051 $val = preg_replace($searchHexEncodings, "chr(hexdec('\\1'))", $val); 00052 $val = preg_replace($searchUnicodeEncodings, "chr('\\1')", $val); 00053 } 00054 00055 // now the only remaining whitespace attacks are \t, \n, and \r 00056 $ra1 = array('javascript', 'vbscript', 'expression', 'applet', 'meta', 'xml', 'blink', 'link', 'style', 'script', 'embed', 'object', 'iframe', 'frame', 'frameset', 'ilayer', 'layer', 'bgsound', 'title', 'base', 'onabort', 'onactivate', 'onafterprint', 'onafterupdate', 'onbeforeactivate', 'onbeforecopy', 'onbeforecut', 'onbeforedeactivate', 'onbeforeeditfocus', 'onbeforepaste', 'onbeforeprint', 'onbeforeunload', 'onbeforeupdate', 'onblur', 'onbounce', 'oncellchange', 'onchange', 'onclick', 'oncontextmenu', 'oncontrolselect', 'oncopy', 'oncut', 'ondataavailable', 'ondatasetchanged', 'ondatasetcomplete', 'ondblclick', 'ondeactivate', 'ondrag', 'ondragend', 'ondragenter', 'ondragleave', 'ondragover', 'ondragstart', 'ondrop', 'onerror', 'onerrorupdate', 'onfilterchange', 'onfinish', 'onfocus', 'onfocusin', 'onfocusout', 'onhelp', 'onkeydown', 'onkeypress', 'onkeyup', 'onlayoutcomplete', 'onload', 'onlosecapture', 'onmousedown', 'onmouseenter', 'onmouseleave', 'onmousemove', 'onmouseout', 'onmouseover', 'onmouseup', 'onmousewheel', 'onmove', 'onmoveend', 'onmovestart', 'onpaste', 'onpropertychange', 'onreadystatechange', 'onreset', 'onresize', 'onresizeend', 'onresizestart', 'onrowenter', 'onrowexit', 'onrowsdelete', 'onrowsinserted', 'onscroll', 'onselect', 'onselectionchange', 'onselectstart', 'onstart', 'onstop', 'onsubmit', 'onunload'); 00057 $ra_tag = array('applet', 'meta', 'xml', 'blink', 'link', 'style', 'script', 'embed', 'object', 'iframe', 'frame', 'frameset', 'ilayer', 'layer', 'bgsound', 'title', 'base'); 00058 $ra_attribute = array('style', 'onabort', 'onactivate', 'onafterprint', 'onafterupdate', 'onbeforeactivate', 'onbeforecopy', 'onbeforecut', 'onbeforedeactivate', 'onbeforeeditfocus', 'onbeforepaste', 'onbeforeprint', 'onbeforeunload', 'onbeforeupdate', 'onblur', 'onbounce', 'oncellchange', 'onchange', 'onclick', 'oncontextmenu', 'oncontrolselect', 'oncopy', 'oncut', 'ondataavailable', 'ondatasetchanged', 'ondatasetcomplete', 'ondblclick', 'ondeactivate', 'ondrag', 'ondragend', 'ondragenter', 'ondragleave', 'ondragover', 'ondragstart', 'ondrop', 'onerror', 'onerrorupdate', 'onfilterchange', 'onfinish', 'onfocus', 'onfocusin', 'onfocusout', 'onhelp', 'onkeydown', 'onkeypress', 'onkeyup', 'onlayoutcomplete', 'onload', 'onlosecapture', 'onmousedown', 'onmouseenter', 'onmouseleave', 'onmousemove', 'onmouseout', 'onmouseover', 'onmouseup', 'onmousewheel', 'onmove', 'onmoveend', 'onmovestart', 'onpaste', 'onpropertychange', 'onreadystatechange', 'onreset', 'onresize', 'onresizeend', 'onresizestart', 'onrowenter', 'onrowexit', 'onrowsdelete', 'onrowsinserted', 'onscroll', 'onselect', 'onselectionchange', 'onselectstart', 'onstart', 'onstop', 'onsubmit', 'onunload'); 00059 $ra_protocol = array('javascript', 'vbscript', 'expression'); 00060 00061 //remove the potential &#xxx; stuff for testing 00062 $val2 = preg_replace('/(&#[xX]?0{0,8}(9|10|13|a|b);)*\s*/i', '', $val); 00063 $ra = array(); 00064 00065 foreach ($ra1 as $ra1word) { 00066 //stripos is faster than the regular expressions used later 00067 //and because the words we're looking for only have chars < 0x80 00068 //we can use the non-multibyte safe version 00069 if (stripos($val2, $ra1word ) !== false ) { 00070 //keep list of potential words that were found 00071 if (in_array($ra1word, $ra_protocol)) { 00072 $ra[] = array($ra1word, 'ra_protocol'); 00073 } 00074 if (in_array($ra1word, $ra_tag)) { 00075 $ra[] = array($ra1word, 'ra_tag'); 00076 } 00077 if (in_array($ra1word, $ra_attribute)) { 00078 $ra[] = array($ra1word, 'ra_attribute'); 00079 } 00080 //some keywords appear in more than one array 00081 //these get multiple entries in $ra, each with the appropriate type 00082 } 00083 } 00084 //only process potential words 00085 if (count($ra) > 0) { 00086 // keep replacing as long as the previous round replaced something 00087 $found = true; 00088 while ($found == true) { 00089 $val_before = $val; 00090 for ($i = 0; $i < sizeof($ra); $i++) { 00091 $pattern = ''; 00092 for ($j = 0; $j < strlen($ra[$i][0]); $j++) { 00093 if ($j > 0) { 00094 $pattern .= '((&#[xX]0{0,8}([9ab]);)|(�{0,8}(9|10|13);)|\s)*'; 00095 } 00096 $pattern .= $ra[$i][0][$j]; 00097 } 00098 //handle each type a little different (extra conditions to prevent false positives a bit better) 00099 switch ($ra[$i][1]) { 00100 case 'ra_protocol': 00101 //these take the form of e.g. 'javascript:' 00102 $pattern .= '((&#[xX]0{0,8}([9ab]);)|(�{0,8}(9|10|13);)|\s)*(?=:)'; 00103 break; 00104 case 'ra_tag': 00105 //these take the form of e.g. '<SCRIPT[^\da-z] ....'; 00106 $pattern = '(?<=<)' . $pattern . '((&#[xX]0{0,8}([9ab]);)|(�{0,8}(9|10|13);)|\s)*(?=[^\da-z])'; 00107 break; 00108 case 'ra_attribute': 00109 //these take the form of e.g. 'onload=' Beware that a lot of characters are allowed 00110 //between the attribute and the equal sign! 00111 $pattern .= '[\s\!\#\$\%\&\(\)\*\~\+\-\_\.\,\:\;\?\@\[\/\|\\\\\]\^\`]*(?==)'; 00112 break; 00113 } 00114 $pattern = '/' . $pattern . '/i'; 00115 // add in <x> to nerf the tag 00116 $replacement = substr_replace($ra[$i][0], $replaceString, 2, 0); 00117 // filter out the hex tags 00118 $val = preg_replace($pattern, $replacement, $val); 00119 if ($val_before == $val) { 00120 // no replacements were made, so exit the loop 00121 $found = false; 00122 } 00123 } 00124 } 00125 } 00126 00127 return $val; 00128 } 00129 } 00130 00131 ?>
1.8.0