|
TYPO3 API
SVNRelease
|
00001 <?php 00002 00003 /** 00004 * This module implements a VERY limited parser that finds <link> tags 00005 * in the head of HTML or XHTML documents and parses out their 00006 * attributes according to the OpenID spec. It is a liberal parser, 00007 * but it requires these things from the data in order to work: 00008 * 00009 * - There must be an open <html> tag 00010 * 00011 * - There must be an open <head> tag inside of the <html> tag 00012 * 00013 * - Only <link>s that are found inside of the <head> tag are parsed 00014 * (this is by design) 00015 * 00016 * - The parser follows the OpenID specification in resolving the 00017 * attributes of the link tags. This means that the attributes DO 00018 * NOT get resolved as they would by an XML or HTML parser. In 00019 * particular, only certain entities get replaced, and href 00020 * attributes do not get resolved relative to a base URL. 00021 * 00022 * From http://openid.net/specs.bml: 00023 * 00024 * - The openid.server URL MUST be an absolute URL. OpenID consumers 00025 * MUST NOT attempt to resolve relative URLs. 00026 * 00027 * - The openid.server URL MUST NOT include entities other than &, 00028 * <, >, and ". 00029 * 00030 * The parser ignores SGML comments and <![CDATA[blocks]]>. Both kinds 00031 * of quoting are allowed for attributes. 00032 * 00033 * The parser deals with invalid markup in these ways: 00034 * 00035 * - Tag names are not case-sensitive 00036 * 00037 * - The <html> tag is accepted even when it is not at the top level 00038 * 00039 * - The <head> tag is accepted even when it is not a direct child of 00040 * the <html> tag, but a <html> tag must be an ancestor of the 00041 * <head> tag 00042 * 00043 * - <link> tags are accepted even when they are not direct children 00044 * of the <head> tag, but a <head> tag must be an ancestor of the 00045 * <link> tag 00046 * 00047 * - If there is no closing tag for an open <html> or <head> tag, the 00048 * remainder of the document is viewed as being inside of the 00049 * tag. If there is no closing tag for a <link> tag, the link tag is 00050 * treated as a short tag. Exceptions to this rule are that <html> 00051 * closes <html> and <body> or <head> closes <head> 00052 * 00053 * - Attributes of the <link> tag are not required to be quoted. 00054 * 00055 * - In the case of duplicated attribute names, the attribute coming 00056 * last in the tag will be the value returned. 00057 * 00058 * - Any text that does not parse as an attribute within a link tag 00059 * will be ignored. (e.g. <link pumpkin rel='openid.server' /> will 00060 * ignore pumpkin) 00061 * 00062 * - If there are more than one <html> or <head> tag, the parser only 00063 * looks inside of the first one. 00064 * 00065 * - The contents of <script> tags are ignored entirely, except 00066 * unclosed <script> tags. Unclosed <script> tags are ignored. 00067 * 00068 * - Any other invalid markup is ignored, including unclosed SGML 00069 * comments and unclosed <![CDATA[blocks. 00070 * 00071 * PHP versions 4 and 5 00072 * 00073 * LICENSE: See the COPYING file included in this distribution. 00074 * 00075 * @access private 00076 * @package OpenID 00077 * @author JanRain, Inc. <openid@janrain.com> 00078 * @copyright 2005-2008 Janrain, Inc. 00079 * @license http://www.apache.org/licenses/LICENSE-2.0 Apache 00080 */ 00081 00082 /** 00083 * Require Auth_OpenID::arrayGet(). 00084 */ 00085 require_once "Auth/OpenID.php"; 00086 00087 class Auth_OpenID_Parse { 00088 00089 /** 00090 * Specify some flags for use with regex matching. 00091 */ 00092 var $_re_flags = "si"; 00093 00094 /** 00095 * Stuff to remove before we start looking for tags 00096 */ 00097 var $_removed_re = 00098 "<!--.*?-->|<!\[CDATA\[.*?\]\]>|<script\b(?!:)[^>]*>.*?<\/script>"; 00099 00100 /** 00101 * Starts with the tag name at a word boundary, where the tag name 00102 * is not a namespace 00103 */ 00104 var $_tag_expr = "<%s\b(?!:)([^>]*?)(?:\/>|>(.*?)(?:<\/?%s\s*>|\Z))"; 00105 00106 var $_attr_find = '\b(\w+)=("[^"]*"|\'[^\']*\'|[^\'"\s\/<>]+)'; 00107 00108 var $_open_tag_expr = "<%s\b"; 00109 var $_close_tag_expr = "<((\/%s\b)|(%s[^>\/]*\/))>"; 00110 00111 function Auth_OpenID_Parse() 00112 { 00113 $this->_link_find = sprintf("/<link\b(?!:)([^>]*)(?!<)>/%s", 00114 $this->_re_flags); 00115 00116 $this->_entity_replacements = array( 00117 'amp' => '&', 00118 'lt' => '<', 00119 'gt' => '>', 00120 'quot' => '"' 00121 ); 00122 00123 $this->_attr_find = sprintf("/%s/%s", 00124 $this->_attr_find, 00125 $this->_re_flags); 00126 00127 $this->_removed_re = sprintf("/%s/%s", 00128 $this->_removed_re, 00129 $this->_re_flags); 00130 00131 $this->_ent_replace = 00132 sprintf("&(%s);", implode("|", 00133 $this->_entity_replacements)); 00134 } 00135 00136 /** 00137 * Returns a regular expression that will match a given tag in an 00138 * SGML string. 00139 */ 00140 function tagMatcher($tag_name, $close_tags = null) 00141 { 00142 $expr = $this->_tag_expr; 00143 00144 if ($close_tags) { 00145 $options = implode("|", array_merge(array($tag_name), $close_tags)); 00146 $closer = sprintf("(?:%s)", $options); 00147 } else { 00148 $closer = $tag_name; 00149 } 00150 00151 $expr = sprintf($expr, $tag_name, $closer); 00152 return sprintf("/%s/%s", $expr, $this->_re_flags); 00153 } 00154 00155 function openTag($tag_name) 00156 { 00157 $expr = sprintf($this->_open_tag_expr, $tag_name); 00158 return sprintf("/%s/%s", $expr, $this->_re_flags); 00159 } 00160 00161 function closeTag($tag_name) 00162 { 00163 $expr = sprintf($this->_close_tag_expr, $tag_name, $tag_name); 00164 return sprintf("/%s/%s", $expr, $this->_re_flags); 00165 } 00166 00167 function htmlBegin($s) 00168 { 00169 $matches = array(); 00170 $result = preg_match($this->openTag('html'), $s, 00171 $matches, PREG_OFFSET_CAPTURE); 00172 if ($result === false || !$matches) { 00173 return false; 00174 } 00175 // Return the offset of the first match. 00176 return $matches[0][1]; 00177 } 00178 00179 function htmlEnd($s) 00180 { 00181 $matches = array(); 00182 $result = preg_match($this->closeTag('html'), $s, 00183 $matches, PREG_OFFSET_CAPTURE); 00184 if ($result === false || !$matches) { 00185 return false; 00186 } 00187 // Return the offset of the first match. 00188 return $matches[count($matches) - 1][1]; 00189 } 00190 00191 function headFind() 00192 { 00193 return $this->tagMatcher('head', array('body', 'html')); 00194 } 00195 00196 function replaceEntities($str) 00197 { 00198 foreach ($this->_entity_replacements as $old => $new) { 00199 $str = preg_replace(sprintf("/&%s;/", $old), $new, $str); 00200 } 00201 return $str; 00202 } 00203 00204 function removeQuotes($str) 00205 { 00206 $matches = array(); 00207 $double = '/^"(.*)"$/'; 00208 $single = "/^\'(.*)\'$/"; 00209 00210 if (preg_match($double, $str, $matches)) { 00211 return $matches[1]; 00212 } else if (preg_match($single, $str, $matches)) { 00213 return $matches[1]; 00214 } else { 00215 return $str; 00216 } 00217 } 00218 00219 /** 00220 * Find all link tags in a string representing a HTML document and 00221 * return a list of their attributes. 00222 * 00223 * @param string $html The text to parse 00224 * @return array $list An array of arrays of attributes, one for each 00225 * link tag 00226 */ 00227 function parseLinkAttrs($html) 00228 { 00229 $stripped = preg_replace($this->_removed_re, 00230 "", 00231 $html); 00232 00233 $html_begin = $this->htmlBegin($stripped); 00234 $html_end = $this->htmlEnd($stripped); 00235 00236 if ($html_begin === false) { 00237 return array(); 00238 } 00239 00240 if ($html_end === false) { 00241 $html_end = strlen($stripped); 00242 } 00243 00244 $stripped = substr($stripped, $html_begin, 00245 $html_end - $html_begin); 00246 00247 // Try to find the <HEAD> tag. 00248 $head_re = $this->headFind(); 00249 $head_matches = array(); 00250 if (!preg_match($head_re, $stripped, $head_matches)) { 00251 return array(); 00252 } 00253 00254 $link_data = array(); 00255 $link_matches = array(); 00256 00257 if (!preg_match_all($this->_link_find, $head_matches[0], 00258 $link_matches)) { 00259 return array(); 00260 } 00261 00262 foreach ($link_matches[0] as $link) { 00263 $attr_matches = array(); 00264 preg_match_all($this->_attr_find, $link, $attr_matches); 00265 $link_attrs = array(); 00266 foreach ($attr_matches[0] as $index => $full_match) { 00267 $name = $attr_matches[1][$index]; 00268 $value = $this->replaceEntities( 00269 $this->removeQuotes($attr_matches[2][$index])); 00270 00271 $link_attrs[strtolower($name)] = $value; 00272 } 00273 $link_data[] = $link_attrs; 00274 } 00275 00276 return $link_data; 00277 } 00278 00279 function relMatches($rel_attr, $target_rel) 00280 { 00281 // Does this target_rel appear in the rel_str? 00282 // XXX: TESTME 00283 $rels = preg_split("/\s+/", trim($rel_attr)); 00284 foreach ($rels as $rel) { 00285 $rel = strtolower($rel); 00286 if ($rel == $target_rel) { 00287 return 1; 00288 } 00289 } 00290 00291 return 0; 00292 } 00293 00294 function linkHasRel($link_attrs, $target_rel) 00295 { 00296 // Does this link have target_rel as a relationship? 00297 // XXX: TESTME 00298 $rel_attr = Auth_OpeniD::arrayGet($link_attrs, 'rel', null); 00299 return ($rel_attr && $this->relMatches($rel_attr, 00300 $target_rel)); 00301 } 00302 00303 function findLinksRel($link_attrs_list, $target_rel) 00304 { 00305 // Filter the list of link attributes on whether it has 00306 // target_rel as a relationship. 00307 // XXX: TESTME 00308 $result = array(); 00309 foreach ($link_attrs_list as $attr) { 00310 if ($this->linkHasRel($attr, $target_rel)) { 00311 $result[] = $attr; 00312 } 00313 } 00314 00315 return $result; 00316 } 00317 00318 function findFirstHref($link_attrs_list, $target_rel) 00319 { 00320 // Return the value of the href attribute for the first link 00321 // tag in the list that has target_rel as a relationship. 00322 // XXX: TESTME 00323 $matches = $this->findLinksRel($link_attrs_list, 00324 $target_rel); 00325 if (!$matches) { 00326 return null; 00327 } 00328 $first = $matches[0]; 00329 return Auth_OpenID::arrayGet($first, 'href', null); 00330 } 00331 } 00332 00333 function Auth_OpenID_legacy_discover($html_text, $server_rel, 00334 $delegate_rel) 00335 { 00336 $p = new Auth_OpenID_Parse(); 00337 00338 $link_attrs = $p->parseLinkAttrs($html_text); 00339 00340 $server_url = $p->findFirstHref($link_attrs, 00341 $server_rel); 00342 00343 if ($server_url === null) { 00344 return false; 00345 } else { 00346 $delegate_url = $p->findFirstHref($link_attrs, 00347 $delegate_rel); 00348 return array($delegate_url, $server_url); 00349 } 00350 } 00351 00352 ?>
1.8.0