RemoveXSS.php

Go to the documentation of this file.
00001 <?php
00002 /**
00003  * Usage: Run *every* variable passed in through it.
00004  * The goal of this function is to be a generic function that can be used to
00005  * parse almost any input and render it XSS safe. For more information on
00006  * actual XSS attacks, check out http://ha.ckers.org/xss.html. Another
00007  * excellent site is the XSS Database which details each attack and how it
00008  * works.
00009  *
00010  * Used with permission by the author.
00011  * URL: http://quickwired.com/smallprojects/php_xss_filter_function.php
00012  *
00013  * Check XSS attacks on http://ha.ckers.org/xss.html
00014  *
00015  * License:
00016  * This code is public domain, you are free to do whatever you want with it,
00017  * including adding it to your own project which can be under any license.
00018  *
00019  * $Id: RemoveXSS.php 4457 2008-11-12 17:15:55Z ohader $
00020  *
00021  * @author  Travis Puderbaugh <kallahar@quickwired.com>
00022  * @author  Jigal van Hemert <jigal@xs4all.nl>
00023  * @package RemoveXSS
00024  */
00025 final class RemoveXSS {
00026     /**
00027      * Removes potential XSS code from an input string.
00028      * Wrapper for RemoveXSS::process().
00029      *
00030      * Using an external class by Travis Puderbaugh <kallahar@quickwired.com>
00031      *
00032      * @param   string      Input string
00033      * @param   string      replaceString for inserting in keywords (which destroyes the tags)
00034      * @return  string      Input string with potential XSS code removed
00035      * @deprecated since TYPO3 4.3, use static call RemoveXSS::process() instead
00036      */
00037     public function RemoveXSS($val, $replaceString = '<x>') {
00038         return self::process($val, $replaceString);
00039     }
00040 
00041     /**
00042      * Removes potential XSS code from an input string.
00043      *
00044      * Using an external class by Travis Puderbaugh <kallahar@quickwired.com>
00045      *
00046      * @param   string      Input string
00047      * @param   string      replaceString for inserting in keywords (which destroyes the tags)
00048      * @return  string      Input string with potential XSS code removed
00049      */
00050     public static function process($val, $replaceString = '<x>') {
00051         // don't use empty $replaceString because then no XSS-remove will be done
00052         if ($replaceString == '') {
00053             $replaceString = '<x>';
00054         }
00055         // remove all non-printable characters. CR(0a) and LF(0b) and TAB(9) are allowed
00056         // this prevents some character re-spacing such as <java\0script>
00057         // note that you have to handle splits with \n, \r, and \t later since they *are* allowed in some inputs
00058         $val = preg_replace('/([\x00-\x08][\x0b-\x0c][\x0e-\x19])/', '', $val);
00059 
00060         // straight replacements, the user should never need these since they're normal characters
00061         // this prevents like <IMG SRC=&#X40&#X61&#X76&#X61&#X73&#X63&#X72&#X69&#X70&#X74&#X3A&#X61&#X6C&#X65&#X72&#X74&#X28&#X27&#X58&#X53&#X53&#X27&#X29>
00062         $search = '/&#[xX]0{0,8}(21|22|23|24|25|26|27|28|29|2a|2b|2d|2f|30|31|32|33|34|35|36|37|38|39|3a|3b|3d|3f|40|41|42|43|44|45|46|47|48|49|4a|4b|4c|4d|4e|4f|50|51|52|53|54|55|56|57|58|59|5a|5b|5c|5d|5e|5f|60|61|62|63|64|65|66|67|68|69|6a|6b|6c|6d|6e|6f|70|71|72|73|74|75|76|77|78|79|7a|7b|7c|7d|7e);?/ie';
00063         $val = preg_replace($search, "chr(hexdec('\\1'))", $val);
00064         $search = '/&#0{0,8}(33|34|35|36|37|38|39|40|41|42|43|45|47|48|49|50|51|52|53|54|55|56|57|58|59|61|63|64|65|66|67|68|69|70|71|72|73|74|75|76|77|78|79|80|81|82|83|84|85|86|87|88|89|90|91|92|93|94|95|96|97|98|99|100|101|102|103|104|105|106|107|108|109|110|111|112|113|114|115|116|117|118|119|120|121|122|123|124|125|126);?/ie';
00065         $val = preg_replace($search, "chr('\\1')", $val);
00066 
00067         // now the only remaining whitespace attacks are \t, \n, and \r
00068         $ra1 = array('javascript', 'vbscript', 'expression', 'applet', 'meta', 'xml', 'blink', 'link', 'style', 'script', 'embed', 'object', 'iframe', 'frame', 'frameset', 'ilayer', 'layer', 'bgsound', 'title', 'base', 'onabort', 'onactivate', 'onafterprint', 'onafterupdate', 'onbeforeactivate', 'onbeforecopy', 'onbeforecut', 'onbeforedeactivate', 'onbeforeeditfocus', 'onbeforepaste', 'onbeforeprint', 'onbeforeunload', 'onbeforeupdate', 'onblur', 'onbounce', 'oncellchange', 'onchange', 'onclick', 'oncontextmenu', 'oncontrolselect', 'oncopy', 'oncut', 'ondataavailable', 'ondatasetchanged', 'ondatasetcomplete', 'ondblclick', 'ondeactivate', 'ondrag', 'ondragend', 'ondragenter', 'ondragleave', 'ondragover', 'ondragstart', 'ondrop', 'onerror', 'onerrorupdate', 'onfilterchange', 'onfinish', 'onfocus', 'onfocusin', 'onfocusout', 'onhelp', 'onkeydown', 'onkeypress', 'onkeyup', 'onlayoutcomplete', 'onload', 'onlosecapture', 'onmousedown', 'onmouseenter', 'onmouseleave', 'onmousemove', 'onmouseout', 'onmouseover', 'onmouseup', 'onmousewheel', 'onmove', 'onmoveend', 'onmovestart', 'onpaste', 'onpropertychange', 'onreadystatechange', 'onreset', 'onresize', 'onresizeend', 'onresizestart', 'onrowenter', 'onrowexit', 'onrowsdelete', 'onrowsinserted', 'onscroll', 'onselect', 'onselectionchange', 'onselectstart', 'onstart', 'onstop', 'onsubmit', 'onunload');
00069         $ra_tag = array('applet', 'meta', 'xml', 'blink', 'link', 'style', 'script', 'embed', 'object', 'iframe', 'frame', 'frameset', 'ilayer', 'layer', 'bgsound', 'title', 'base');
00070         $ra_attribute = array('style', 'onabort', 'onactivate', 'onafterprint', 'onafterupdate', 'onbeforeactivate', 'onbeforecopy', 'onbeforecut', 'onbeforedeactivate', 'onbeforeeditfocus', 'onbeforepaste', 'onbeforeprint', 'onbeforeunload', 'onbeforeupdate', 'onblur', 'onbounce', 'oncellchange', 'onchange', 'onclick', 'oncontextmenu', 'oncontrolselect', 'oncopy', 'oncut', 'ondataavailable', 'ondatasetchanged', 'ondatasetcomplete', 'ondblclick', 'ondeactivate', 'ondrag', 'ondragend', 'ondragenter', 'ondragleave', 'ondragover', 'ondragstart', 'ondrop', 'onerror', 'onerrorupdate', 'onfilterchange', 'onfinish', 'onfocus', 'onfocusin', 'onfocusout', 'onhelp', 'onkeydown', 'onkeypress', 'onkeyup', 'onlayoutcomplete', 'onload', 'onlosecapture', 'onmousedown', 'onmouseenter', 'onmouseleave', 'onmousemove', 'onmouseout', 'onmouseover', 'onmouseup', 'onmousewheel', 'onmove', 'onmoveend', 'onmovestart', 'onpaste', 'onpropertychange', 'onreadystatechange', 'onreset', 'onresize', 'onresizeend', 'onresizestart', 'onrowenter', 'onrowexit', 'onrowsdelete', 'onrowsinserted', 'onscroll', 'onselect', 'onselectionchange', 'onselectstart', 'onstart', 'onstop', 'onsubmit', 'onunload');
00071         $ra_protocol = array('javascript', 'vbscript', 'expression');
00072 
00073         //remove the potential &#xxx; stuff for testing
00074         $val2 = preg_replace('/(&#[xX]?0{0,8}(9|10|13|a|b);)*\s*/i', '', $val);
00075         $ra = array();
00076 
00077         foreach ($ra1 as $ra1word) {
00078             //stripos is faster than the regular expressions used later
00079             //and because the words we're looking for only have chars < 0x80
00080             //we can use the non-multibyte safe version
00081             if (stripos($val2, $ra1word ) !== false ) {
00082                 //keep list of potential words that were found
00083                 if (in_array($ra1word, $ra_protocol)) {
00084                     $ra[] = array($ra1word, 'ra_protocol');
00085                 }
00086                 if (in_array($ra1word, $ra_tag)) {
00087                     $ra[] = array($ra1word, 'ra_tag');
00088                 }
00089                 if (in_array($ra1word, $ra_attribute)) {
00090                     $ra[] = array($ra1word, 'ra_attribute');
00091                 }
00092                 //some keywords appear in more than one array
00093                 //these get multiple entries in $ra, each with the appropriate type
00094             }
00095         }
00096         //only process potential words
00097         if (count($ra) > 0) {
00098             // keep replacing as long as the previous round replaced something
00099             $found = true;
00100             while ($found == true) {
00101                 $val_before = $val;
00102                 for ($i = 0; $i < sizeof($ra); $i++) {
00103                     $pattern = '';
00104                     for ($j = 0; $j < strlen($ra[$i][0]); $j++) {
00105                         if ($j > 0) {
00106                             $pattern .= '((&#[xX]0{0,8}([9ab]);)|(&#0{0,8}(9|10|13);)|\s)*';
00107                         }
00108                         $pattern .= $ra[$i][0][$j];
00109                     }
00110                     //handle each type a little different (extra conditions to prevent false positives a bit better)
00111                     switch ($ra[$i][1]) {
00112                         case 'ra_protocol':
00113                             //these take the form of e.g. 'javascript:'
00114                             $pattern .= '((&#[xX]0{0,8}([9ab]);)|(&#0{0,8}(9|10|13);)|\s)*(?=:)';
00115                             break;
00116                         case 'ra_tag':
00117                             //these take the form of e.g. '<SCRIPT[^\da-z] ....';
00118                             $pattern = '(?<=<)' . $pattern . '((&#[xX]0{0,8}([9ab]);)|(&#0{0,8}(9|10|13);)|\s)*(?=[^\da-z])';
00119                             break;
00120                         case 'ra_attribute':
00121                             //these take the form of e.g. 'onload='  Beware that a lot of characters are allowed
00122                             //between the attribute and the equal sign!
00123                             $pattern .= '[\s\!\#\$\%\&\(\)\*\~\+\-\_\.\,\:\;\?\@\[\/\|\\\\\]\^\`]*(?==)';
00124                             break;
00125                     }
00126                     $pattern = '/' . $pattern . '/i';
00127                     // add in <x> to nerf the tag
00128                     $replacement = substr_replace($ra[$i][0], $replaceString, 2, 0);
00129                     // filter out the hex tags
00130                     $val = preg_replace($pattern, $replacement, $val);
00131                     if ($val_before == $val) {
00132                         // no replacements were made, so exit the loop
00133                         $found = false;
00134                     }
00135                 }
00136             }
00137         }
00138 
00139         return $val;
00140     }
00141 }
00142 
00143 ?>

Generated on Sat Jan 3 04:23:28 2009 for TYPO3 API by  doxygen 1.4.7