00001 <?php
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103
00104
00105
00106 class t3lib_parsehtml {
00107
00108 protected $caseShift_cache = array();
00109
00110
00111
00112
00113
00114
00115
00116
00117
00118 public static function getSubpart($content, $marker) {
00119 $start = strpos($content, $marker);
00120
00121 if ($start === false) {
00122 return '';
00123 }
00124
00125 $start += strlen($marker);
00126 $stop = strpos($content, $marker, $start);
00127
00128
00129
00130 if ($stop===false) {
00131 return '';
00132 }
00133
00134 $content = substr($content, $start, $stop-$start);
00135
00136 $matches = array();
00137 if (preg_match('/^([^\<]*\-\-\>)(.*)(\<\!\-\-[^\>]*)$/s', $content, $matches) === 1) {
00138 return $matches[2];
00139 }
00140
00141 $matches = array();
00142 if (preg_match('/(.*)(\<\!\-\-[^\>]*)$/s', $content, $matches) === 1) {
00143 return $matches[1];
00144 }
00145
00146 $matches = array();
00147 if (preg_match('/^([^\<]*\-\-\>)(.*)$/s', $content, $matches) === 1) {
00148 return $matches[2];
00149 }
00150
00151 return $content;
00152 }
00153
00154
00155
00156
00157
00158
00159
00160
00161
00162
00163
00164 public static function substituteSubpart($content, $marker, $subpartContent, $recursive = 1, $keepMarker = 0) {
00165 $start = strpos($content, $marker);
00166
00167 if ($start === false) {
00168 return $content;
00169 }
00170
00171 $startAM = $start + strlen($marker);
00172 $stop = strpos($content, $marker, $startAM);
00173
00174 if ($stop===false) {
00175 return $content;
00176 }
00177
00178 $stopAM = $stop + strlen($marker);
00179 $before = substr($content, 0, $start);
00180 $after = substr($content, $stopAM);
00181 $between = substr($content, $startAM, $stop-$startAM);
00182
00183 if ($recursive) {
00184 $after = self::substituteSubpart(
00185 $after,
00186 $marker,
00187 $subpartContent,
00188 $recursive,
00189 $keepMarker
00190 );
00191 }
00192
00193 if ($keepMarker) {
00194 $matches = array();
00195 if (preg_match('/^([^\<]*\-\-\>)(.*)(\<\!\-\-[^\>]*)$/s', $between, $matches) === 1) {
00196 $before .= $marker.$matches[1];
00197 $between = $matches[2];
00198 $after = $matches[3] . $marker . $after;
00199 } elseif (preg_match('/^(.*)(\<\!\-\-[^\>]*)$/s', $between, $matches) === 1) {
00200 $before .= $marker;
00201 $between = $matches[1];
00202 $after = $matches[2] . $marker . $after;
00203 } elseif (preg_match('/^([^\<]*\-\-\>)(.*)$/s', $between, $matches) === 1) {
00204 $before .= $marker . $matches[1];
00205 $between = $matches[2];
00206 $after = $marker . $after;
00207 } else {
00208 $before .= $marker;
00209 $after = $marker . $after;
00210 }
00211
00212 } else {
00213 $matches = array();
00214 if (preg_match('/^(.*)\<\!\-\-[^\>]*$/s', $before, $matches) === 1) {
00215 $before = $matches[1];
00216 }
00217
00218 if (is_array($subpartContent)) {
00219 $matches = array();
00220 if (preg_match('/^([^\<]*\-\-\>)(.*)(\<\!\-\-[^\>]*)$/s', $between, $matches) === 1) {
00221 $between = $matches[2];
00222 } elseif (preg_match('/^(.*)(\<\!\-\-[^\>]*)$/s', $between, $matches)===1) {
00223 $between = $matches[1];
00224 } elseif (preg_match('/^([^\<]*\-\-\>)(.*)$/s', $between, $matches)===1) {
00225 $between = $matches[2];
00226 }
00227 }
00228
00229 $matches = array();
00230 if (preg_match('/^[^\<]*\-\-\>(.*)$/s', $after, $matches) === 1) {
00231 $after = $matches[1];
00232 }
00233 }
00234
00235 if (is_array($subpartContent)) {
00236 $between = $subpartContent[0] . $between . $subpartContent[1];
00237 } else {
00238 $between = $subpartContent;
00239 }
00240
00241 return $before . $between . $after;
00242 }
00243
00244
00245
00246
00247
00248
00249
00250
00251 public static function substituteSubpartArray($content, array $subpartsContent) {
00252 foreach ($subpartsContent as $subpartMarker => $subpartContent) {
00253 $content = self::substituteSubpart(
00254 $content,
00255 $subpartMarker,
00256 $subpartContent
00257 );
00258 }
00259
00260 return $content;
00261 }
00262
00263
00264
00265
00266
00267
00268
00269
00270
00271
00272
00273
00274 public static function substituteMarker($content, $marker, $markContent) {
00275 return str_replace($marker, $markContent, $content);
00276 }
00277
00278
00279
00280
00281
00282
00283
00284
00285
00286
00287
00288
00289
00290
00291
00292
00293
00294
00295
00296
00297 public static function substituteMarkerArray($content, $markContentArray, $wrap = '', $uppercase = 0, $deleteUnused = 0) {
00298 if (is_array($markContentArray)) {
00299 $wrapArr = t3lib_div::trimExplode('|', $wrap);
00300
00301 foreach ($markContentArray as $marker => $markContent) {
00302 if ($uppercase) {
00303
00304 $marker = strtr(
00305 $marker,
00306 'abcdefghijklmnopqrstuvwxyz',
00307 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
00308 );
00309 }
00310
00311 if (count($wrapArr) > 0) {
00312 $marker = $wrapArr[0] . $marker . $wrapArr[1];
00313 }
00314
00315 $content = str_replace($marker, $markContent, $content);
00316 }
00317
00318 if ($deleteUnused) {
00319 if (empty($wrap)) {
00320 $wrapArr = array('###', '###');
00321 }
00322
00323 $content = preg_replace('/'.preg_quote($wrapArr[0]).'([A-Z0-9_-|]*)'.preg_quote($wrapArr[1]).'/is', '', $content);
00324 }
00325 }
00326
00327 return $content;
00328 }
00329
00330
00331
00332
00333
00334
00335
00336
00337
00338
00339
00340
00341
00342
00343
00344
00345
00346
00347
00348
00349
00350
00351
00352
00353 function splitIntoBlock($tag,$content,$eliminateExtraEndTags=0) {
00354 $tags=array_unique(t3lib_div::trimExplode(',',$tag,1));
00355 $regexStr = '/\<\/?('.implode('|', $tags).')(\s*\>|\s[^\>]*\>)/si';
00356
00357 $parts = preg_split($regexStr, $content);
00358
00359 $newParts=array();
00360 $pointer=strlen($parts[0]);
00361 $buffer=$parts[0];
00362 $nested=0;
00363 reset($parts);
00364 next($parts);
00365 while(list($k,$v)=each($parts)) {
00366 $isEndTag= substr($content,$pointer,2)=='</' ? 1 : 0;
00367 $tagLen = strcspn(substr($content,$pointer),'>')+1;
00368
00369 if (!$isEndTag) {
00370 if (!$nested) {
00371 $newParts[]=$buffer;
00372 $buffer='';
00373 }
00374 $nested++;
00375 $mbuffer=substr($content,$pointer,strlen($v)+$tagLen);
00376 $pointer+=strlen($mbuffer);
00377 $buffer.=$mbuffer;
00378 } else {
00379 $nested--;
00380 $eliminated=0;
00381 if ($eliminateExtraEndTags && $nested<0) {
00382 $nested=0;
00383 $eliminated=1;
00384 } else {
00385 $buffer.=substr($content,$pointer,$tagLen);
00386 }
00387 $pointer+=$tagLen;
00388 if (!$nested && !$eliminated) {
00389 $newParts[]=$buffer;
00390 $buffer='';
00391 }
00392 $mbuffer=substr($content,$pointer,strlen($v));
00393 $pointer+=strlen($mbuffer);
00394 $buffer.=$mbuffer;
00395 }
00396
00397 }
00398 $newParts[]=$buffer;
00399 return $newParts;
00400 }
00401
00402
00403
00404
00405
00406
00407
00408
00409
00410
00411
00412
00413
00414 function splitIntoBlockRecursiveProc($tag,$content,&$procObj,$callBackContent,$callBackTags,$level=0) {
00415 $parts = $this->splitIntoBlock($tag,$content,TRUE);
00416 foreach($parts as $k => $v) {
00417 if ($k%2) {
00418 $firstTagName = $this->getFirstTagName($v, TRUE);
00419 $tagsArray = array();
00420 $tagsArray['tag_start'] = $this->getFirstTag($v);
00421 $tagsArray['tag_end'] = '</'.$firstTagName.'>';
00422 $tagsArray['tag_name'] = strtolower($firstTagName);
00423 $tagsArray['add_level'] = 1;
00424 $tagsArray['content'] = $this->splitIntoBlockRecursiveProc($tag,$this->removeFirstAndLastTag($v),$procObj,$callBackContent,$callBackTags,$level+$tagsArray['add_level']);
00425
00426 if ($callBackTags) $tagsArray = $procObj->$callBackTags($tagsArray,$level);
00427
00428 $parts[$k] =
00429 $tagsArray['tag_start'].
00430 $tagsArray['content'].
00431 $tagsArray['tag_end'];
00432 } else {
00433 if ($callBackContent) $parts[$k] = $procObj->$callBackContent($parts[$k],$level);
00434 }
00435 }
00436
00437 return implode('',$parts);
00438 }
00439
00440
00441
00442
00443
00444
00445
00446
00447
00448
00449
00450 function splitTags($tag,$content) {
00451 $tags = t3lib_div::trimExplode(',',$tag,1);
00452 $regexStr = '/\<('.implode('|', $tags).')(\s[^>]*)?\/?>/si';
00453 $parts = preg_split($regexStr, $content);
00454
00455 $pointer = strlen($parts[0]);
00456 $newParts = array();
00457 $newParts[] = $parts[0];
00458 reset($parts);
00459 next($parts);
00460 while(list($k,$v)=each($parts)) {
00461 $tagLen = strcspn(substr($content,$pointer),'>')+1;
00462
00463
00464 $tag = substr($content,$pointer,$tagLen);
00465 $newParts[] = $tag;
00466 $pointer+= strlen($tag);
00467
00468
00469 $newParts[] = $v;
00470 $pointer+= strlen($v);
00471 }
00472 return $newParts;
00473 }
00474
00475
00476
00477
00478
00479
00480
00481
00482
00483
00484 function getAllParts($parts,$tag_parts=1,$include_tag=1) {
00485 $newParts=array();
00486 foreach ($parts as $k => $v) {
00487 if (($k+($tag_parts?0:1))%2) {
00488 if (!$include_tag) $v=$this->removeFirstAndLastTag($v);
00489 $newParts[]=$v;
00490 }
00491 }
00492 return $newParts;
00493 }
00494
00495
00496
00497
00498
00499
00500
00501
00502 function removeFirstAndLastTag($str) {
00503
00504 $start = strpos($str,'>');
00505
00506 $end = strrpos($str,'<');
00507
00508 return substr($str, $start+1, $end-$start-1);
00509 }
00510
00511
00512
00513
00514
00515
00516
00517
00518 function getFirstTag($str) {
00519
00520 $endLen = strpos($str,'>')+1;
00521 return substr($str,0,$endLen);
00522 }
00523
00524
00525
00526
00527
00528
00529
00530
00531
00532 function getFirstTagName($str,$preserveCase=FALSE) {
00533 $matches = array();
00534 if (preg_match('/^\s*\<([^\s\>]+)(\s|\>)/', $str, $matches)===1) {
00535 if (!$preserveCase) {
00536 return strtoupper($matches[1]);
00537 }
00538 return $matches[1];
00539 }
00540 return '';
00541 }
00542
00543
00544
00545
00546
00547
00548
00549
00550
00551 function get_tag_attributes($tag,$deHSC=0) {
00552 list($components,$metaC) = $this->split_tag_attributes($tag);
00553 $name = '';
00554 $valuemode = false;
00555 $attributes = array();
00556 $attributesMeta = array();
00557 if (is_array($components)) {
00558 foreach ($components as $key => $val) {
00559 if ($val != '=') {
00560 if ($valuemode) {
00561 if ($name) {
00562 $attributes[$name] = $deHSC?t3lib_div::htmlspecialchars_decode($val):$val;
00563 $attributesMeta[$name]['dashType']=$metaC[$key];
00564 $name = '';
00565 }
00566 } else {
00567 if ($namekey = preg_replace('/[^[:alnum:]_\:\-]/','',$val)) {
00568 $name = strtolower($namekey);
00569 $attributesMeta[$name]=array();
00570 $attributesMeta[$name]['origTag']=$namekey;
00571 $attributes[$name] = '';
00572 }
00573 }
00574 $valuemode = false;
00575 } else {
00576 $valuemode = true;
00577 }
00578 }
00579 return array($attributes,$attributesMeta);
00580 }
00581 }
00582
00583
00584
00585
00586
00587
00588
00589
00590
00591
00592 function split_tag_attributes($tag) {
00593 $matches = array();
00594 if (preg_match('/(\<[^\s]+\s+)?(.*?)\s*(\>)?$/s', $tag, $matches)!==1) {
00595 return array(array(), array());
00596 }
00597 $tag_tmp = $matches[2];
00598
00599 $metaValue = array();
00600 $value = array();
00601 $matches = array();
00602 if (preg_match_all('/("[^"]*"|\'[^\']*\'|[^\s"\'\=]+|\=)/s', $tag_tmp, $matches)>0) {
00603 foreach ($matches[1] as $part) {
00604 $firstChar = substr($part, 0, 1);
00605 if ($firstChar=='"' || $firstChar=="'") {
00606 $metaValue[] = $firstChar;
00607 $value[] = substr($part, 1, -1);
00608 } else {
00609 $metaValue[] = '';
00610 $value[] = $part;
00611 }
00612 }
00613 }
00614 return array($value,$metaValue);
00615 }
00616
00617
00618
00619
00620
00621
00622
00623
00624
00625
00626
00627
00628
00629
00630 function checkTagTypeCounts($content,$blockTags='a,b,blockquote,body,div,em,font,form,h1,h2,h3,h4,h5,h6,i,li,map,ol,option,p,pre,select,span,strong,table,td,textarea,tr,u,ul', $soloTags='br,hr,img,input,area') {
00631 $content = strtolower($content);
00632 $analyzedOutput=array();
00633 $analyzedOutput['counts']=array();
00634 $analyzedOutput['errors']=array();
00635 $analyzedOutput['warnings']=array();
00636 $analyzedOutput['blocks']=array();
00637 $analyzedOutput['solo']=array();
00638
00639
00640 $blockTags = explode(',',$blockTags);
00641 foreach($blockTags as $tagName) {
00642 $countBegin = count(preg_split('/\<'.$tagName.'(\s|\>)/s',$content))-1;
00643 $countEnd = count(preg_split('/\<\/'.$tagName.'(\s|\>)/s',$content))-1;
00644 $analyzedOutput['blocks'][$tagName]=array($countBegin,$countEnd,$countBegin-$countEnd);
00645 if ($countBegin) $analyzedOutput['counts'][$tagName]=$countBegin;
00646 if ($countBegin-$countEnd) {
00647 if ($countBegin-$countEnd > 0) {
00648 $analyzedOutput['errors'][$tagName]='There were more start-tags ('.$countBegin.') than end-tags ('.$countEnd.') for the element "'.$tagName.'". There should be an equal amount!';
00649 } else {
00650 $analyzedOutput['warnings'][$tagName]='There were more end-tags ('.$countEnd.') than start-tags ('.$countBegin.') for the element "'.$tagName.'". There should be an equal amount! However the problem is not fatal.';
00651 }
00652 }
00653 }
00654
00655
00656 $soloTags = explode(',',$soloTags);
00657 foreach($soloTags as $tagName) {
00658 $countBegin = count(preg_split('/\<'.$tagName.'(\s|\>)/s',$content))-1;
00659 $countEnd = count(preg_split('/\<\/'.$tagName.'(\s|\>)/s',$content))-1;
00660 $analyzedOutput['solo'][$tagName]=array($countBegin,$countEnd);
00661 if ($countBegin) $analyzedOutput['counts'][$tagName]=$countBegin;
00662 if ($countEnd) {
00663 $analyzedOutput['warnings'][$tagName]='There were end-tags found ('.$countEnd.') for the element "'.$tagName.'". This was not expected (although XHTML technically allows it).';
00664 }
00665 }
00666
00667 return $analyzedOutput;
00668 }
00669
00670
00671
00672
00673
00674
00675
00676
00677
00678
00679
00680
00681
00682
00683
00684
00685
00686
00687
00688
00689
00690
00691
00692
00693
00694
00695
00696
00697
00698
00699
00700
00701
00702
00703
00704
00705
00706
00707
00708
00709
00710
00711
00712
00713
00714
00715
00716
00717
00718
00719
00720
00721
00722
00723 function HTMLcleaner($content, $tags=array(),$keepAll=0,$hSC=0,$addConfig=array()) {
00724 $newContent = array();
00725 $tokArr = explode('<',$content);
00726 $newContent[] = $this->processContent(current($tokArr),$hSC,$addConfig);
00727 next($tokArr);
00728
00729 $c = 1;
00730 $tagRegister = array();
00731 $tagStack = array();
00732 $inComment = false; $skipTag = false;
00733 while(list(,$tok)=each($tokArr)) {
00734 if ($inComment) {
00735 if (($eocPos = strpos($tok, '-->')) === false) {
00736
00737 $newContent[$c++] = '<' . $tok;
00738 continue;
00739 }
00740
00741 $newContent[$c++] = '<' . substr($tok, 0, $eocPos + 3);
00742 $tok = substr($tok, $eocPos + 3);
00743 $inComment = false; $skipTag = true;
00744 }
00745 elseif (substr($tok, 0, 3) == '!--') {
00746 if (($eocPos = strpos($tok, '-->')) === false) {
00747
00748 $newContent[$c++] = '<' . $tok;
00749 $inComment = true;
00750 continue;
00751 }
00752
00753 $newContent[$c++] = '<' . substr($tok, 0, $eocPos + 3);
00754 $tok = substr($tok, $eocPos + 3);
00755 $skipTag = true;
00756 }
00757 $firstChar = substr($tok,0,1);
00758 if (!$skipTag && preg_match('/[[:alnum:]\/]/',$firstChar)==1) {
00759 $tagEnd = strpos($tok,'>');
00760 if ($tagEnd) {
00761 $endTag = $firstChar=='/' ? 1 : 0;
00762 $tagContent = substr($tok,$endTag,$tagEnd-$endTag);
00763 $tagParts = preg_split('/\s+/s',$tagContent,2);
00764 $tagName = strtolower($tagParts[0]);
00765 if (isset($tags[$tagName])) {
00766 if (is_array($tags[$tagName])) {
00767
00768 if (!$endTag) {
00769
00770 if (strcmp($tags[$tagName]['overrideAttribs'],'')) {
00771 $tagParts[1]=$tags[$tagName]['overrideAttribs'];
00772 }
00773
00774
00775 if (strcmp($tags[$tagName]['allowedAttribs'],'')) {
00776 if (!strcmp($tags[$tagName]['allowedAttribs'],'0')) {
00777 $tagParts[1]='';
00778 } elseif (trim($tagParts[1])) {
00779 $tagAttrib = $this->get_tag_attributes($tagParts[1]);
00780 $tagParts[1]='';
00781 $newTagAttrib = array();
00782 if (!($tList = $tags[$tagName]['_allowedAttribs'])) {
00783
00784 $tList = $tags[$tagName]['_allowedAttribs'] = t3lib_div::trimExplode(',',strtolower($tags[$tagName]['allowedAttribs']),1);
00785 }
00786 foreach ($tList as $allowTag) {
00787 if (isset($tagAttrib[0][$allowTag])) $newTagAttrib[$allowTag]=$tagAttrib[0][$allowTag];
00788 }
00789 $tagParts[1]=$this->compileTagAttribs($newTagAttrib,$tagAttrib[1]);
00790 }
00791 }
00792
00793
00794 if (is_array($tags[$tagName]['fixAttrib'])) {
00795 $tagAttrib = $this->get_tag_attributes($tagParts[1]);
00796 $tagParts[1]='';
00797 foreach ($tags[$tagName]['fixAttrib'] as $attr => $params) {
00798 if (strlen($params['set'])) $tagAttrib[0][$attr] = $params['set'];
00799 if (strlen($params['unset'])) unset($tagAttrib[0][$attr]);
00800 if (strcmp($params['default'],'') && !isset($tagAttrib[0][$attr])) $tagAttrib[0][$attr]=$params['default'];
00801 if ($params['always'] || isset($tagAttrib[0][$attr])) {
00802 if ($params['trim']) {$tagAttrib[0][$attr]=trim($tagAttrib[0][$attr]);}
00803 if ($params['intval']) {$tagAttrib[0][$attr]=intval($tagAttrib[0][$attr]);}
00804 if ($params['lower']) {$tagAttrib[0][$attr]=strtolower($tagAttrib[0][$attr]);}
00805 if ($params['upper']) {$tagAttrib[0][$attr]=strtoupper($tagAttrib[0][$attr]);}
00806 if ($params['range']) {
00807 if (isset($params['range'][1])) {
00808 $tagAttrib[0][$attr]=t3lib_div::intInRange($tagAttrib[0][$attr],intval($params['range'][0]),intval($params['range'][1]));
00809 } else {
00810 $tagAttrib[0][$attr]=t3lib_div::intInRange($tagAttrib[0][$attr],intval($params['range'][0]));
00811 }
00812 }
00813 if (is_array($params['list'])) {
00814
00815
00816 if ($attr == 'class') {
00817 $newClasses = array();
00818 $classes = t3lib_div::trimExplode(' ', $tagAttrib[0][$attr], TRUE);
00819 foreach ($classes as $class) {
00820 if (in_array($class, $params['list'])) {
00821 $newClasses[] = $class;
00822 }
00823 }
00824 if (count($newClasses)) {
00825 $tagAttrib[0][$attr] = implode(' ', $newClasses);
00826 } else {
00827 $tagAttrib[0][$attr] = '';
00828 }
00829 } else {
00830 if (!in_array($this->caseShift($tagAttrib[0][$attr],$params['casesensitiveComp']),$this->caseShift($params['list'],$params['casesensitiveComp'],$tagName))) {
00831 $tagAttrib[0][$attr]=$params['list'][0];
00832 }
00833 }
00834 }
00835 if (($params['removeIfFalse'] && $params['removeIfFalse']!='blank' && !$tagAttrib[0][$attr]) || ($params['removeIfFalse']=='blank' && !strcmp($tagAttrib[0][$attr],''))) {
00836 unset($tagAttrib[0][$attr]);
00837 }
00838 if (strcmp($params['removeIfEquals'],'') && !strcmp($this->caseShift($tagAttrib[0][$attr],$params['casesensitiveComp']),$this->caseShift($params['removeIfEquals'],$params['casesensitiveComp']))) {
00839 unset($tagAttrib[0][$attr]);
00840 }
00841 if ($params['prefixLocalAnchors']) {
00842 if (substr($tagAttrib[0][$attr],0,1)=='#') {
00843 $prefix = t3lib_div::getIndpEnv('TYPO3_REQUEST_URL');
00844 $tagAttrib[0][$attr] = $prefix.$tagAttrib[0][$attr];
00845 if ($params['prefixLocalAnchors']==2 && t3lib_div::isFirstPartOfStr($prefix,t3lib_div::getIndpEnv('TYPO3_SITE_URL'))) {
00846 $tagAttrib[0][$attr] = substr($tagAttrib[0][$attr],strlen(t3lib_div::getIndpEnv('TYPO3_SITE_URL')));
00847 }
00848 }
00849 }
00850 if ($params['prefixRelPathWith']) {
00851 $urlParts = parse_url($tagAttrib[0][$attr]);
00852 if (!$urlParts['scheme'] && substr($urlParts['path'],0,1)!='/') {
00853 $tagAttrib[0][$attr] = $params['prefixRelPathWith'].$tagAttrib[0][$attr];
00854 }
00855 }
00856 if ($params['userFunc']) {
00857 $tagAttrib[0][$attr] = t3lib_div::callUserFunction($params['userFunc'],$tagAttrib[0][$attr],$this);
00858 }
00859 }
00860 }
00861 $tagParts[1]=$this->compileTagAttribs($tagAttrib[0],$tagAttrib[1]);
00862 }
00863 } else {
00864 $tagParts[1]='';
00865 }
00866
00867
00868 if ($tags[$tagName]['protect']) {
00869 $lt = '<'; $gt = '>';
00870 } else {
00871 $lt = '<'; $gt = '>';
00872 }
00873
00874 if ($tags[$tagName]['remap']) $tagParts[0] = $tags[$tagName]['remap'];
00875
00876
00877 if ($endTag || trim($tagParts[1]) || !$tags[$tagName]['rmTagIfNoAttrib']) {
00878 $setTag=1;
00879
00880 if ($tags[$tagName]['nesting']) {
00881 if (!is_array($tagRegister[$tagName])) $tagRegister[$tagName]=array();
00882
00883 if ($endTag) {
00884
00885
00886
00887
00888
00889 $correctTag=1;
00890 if ($tags[$tagName]['nesting']=='global') {
00891 $lastEl = end($tagStack);
00892 if (strcmp($tagName,$lastEl)) {
00893 if (in_array($tagName,$tagStack)) {
00894 while(count($tagStack) && strcmp($tagName,$lastEl)) {
00895 $elPos = end($tagRegister[$lastEl]);
00896 unset($newContent[$elPos]);
00897
00898 array_pop($tagRegister[$lastEl]);
00899 array_pop($tagStack);
00900 $lastEl = end($tagStack);
00901 }
00902 } else {
00903 $correctTag=0;
00904 }
00905 }
00906 }
00907 if (!count($tagRegister[$tagName]) || !$correctTag) {
00908 $setTag=0;
00909 } else {
00910 array_pop($tagRegister[$tagName]);
00911 if ($tags[$tagName]['nesting']=='global') {array_pop($tagStack);}
00912 }
00913 } else {
00914 array_push($tagRegister[$tagName],$c);
00915 if ($tags[$tagName]['nesting']=='global') {array_push($tagStack,$tagName);}
00916 }
00917 }
00918
00919 if ($setTag) {
00920
00921 $newContent[$c++]=$this->processTag($lt.($endTag?'/':'').trim($tagParts[0].' '.$tagParts[1]).$gt,$addConfig,$endTag,$lt=='<');
00922 }
00923 }
00924 } else {
00925 $newContent[$c++]=$this->processTag('<'.($endTag?'/':'').$tagContent.'>',$addConfig,$endTag);
00926 }
00927 } elseif ($keepAll) {
00928 if (!strcmp($keepAll,'protect')) {
00929 $lt = '<'; $gt = '>';
00930 } else {
00931 $lt = '<'; $gt = '>';
00932 }
00933 $newContent[$c++]=$this->processTag($lt.($endTag?'/':'').$tagContent.$gt,$addConfig,$endTag,$lt=='<');
00934 }
00935 $newContent[$c++]=$this->processContent(substr($tok,$tagEnd+1),$hSC,$addConfig);
00936 } else {
00937 $newContent[$c++]=$this->processContent('<'.$tok,$hSC,$addConfig);
00938 }
00939 } else {
00940 $newContent[$c++]=$this->processContent(($skipTag ? '' : '<') . $tok, $hSC, $addConfig);
00941 $skipTag = false;
00942 }
00943 }
00944
00945
00946 foreach ($tagRegister as $tag => $positions) {
00947 foreach ($positions as $pKey) {
00948 unset($newContent[$pKey]);
00949 }
00950 }
00951
00952 return implode('',$newContent);
00953 }
00954
00955
00956
00957
00958
00959
00960
00961
00962 function bidir_htmlspecialchars($value,$dir) {
00963 if ($dir==1) {
00964 $value = htmlspecialchars($value);
00965 } elseif ($dir==2) {
00966 $value = t3lib_div::deHSCentities(htmlspecialchars($value));
00967 } elseif ($dir==-1) {
00968 $value = str_replace('>','>',$value);
00969 $value = str_replace('<','<',$value);
00970 $value = str_replace('"','"',$value);
00971 $value = str_replace('&','&',$value);
00972 }
00973 return $value;
00974 }
00975
00976
00977
00978
00979
00980
00981
00982
00983
00984
00985 function prefixResourcePath($main_prefix,$content,$alternatives=array(),$suffix='') {
00986
00987 $parts = $this->splitTags('embed,td,table,body,img,input,form,link,script,a,param',$content);
00988 foreach ($parts as $k => $v) {
00989 if ($k%2) {
00990 $params = $this->get_tag_attributes($v);
00991 $tagEnd = substr($v,-2)=='/>' ? ' />' : '>'; // Detect tag-ending so that it is re-applied correctly.
00992 $firstTagName = $this->getFirstTagName($v); // The 'name' of the first tag
00993 $somethingDone=0;
00994 $prefix = isset($alternatives[strtoupper($firstTagName)]) ? $alternatives[strtoupper($firstTagName)] : $main_prefix;
00995 switch(strtolower($firstTagName)) {
00996 // background - attribute:
00997 case 'td':
00998 case 'body':
00999 case 'table':
01000 $src = $params[0]['background'];
01001 if ($src) {
01002 $params[0]['background'] = $this->prefixRelPath($prefix,$params[0]['background'],$suffix);
01003 $somethingDone=1;
01004 }
01005 break;
01006 // src attribute
01007 case 'img':
01008 case 'input':
01009 case 'script':
01010 case 'embed':
01011 $src = $params[0]['src'];
01012 if ($src) {
01013 $params[0]['src'] = $this->prefixRelPath($prefix,$params[0]['src'],$suffix);
01014 $somethingDone=1;
01015 }
01016 break;
01017 case 'link':
01018 case 'a':
01019 $src = $params[0]['href'];
01020 if ($src) {
01021 $params[0]['href'] = $this->prefixRelPath($prefix,$params[0]['href'],$suffix);
01022 $somethingDone=1;
01023 }
01024 break;
01025 // action attribute
01026 case 'form':
01027 $src = $params[0]['action'];
01028 if ($src) {
01029 $params[0]['action'] = $this->prefixRelPath($prefix,$params[0]['action'],$suffix);
01030 $somethingDone=1;
01031 }
01032 break;
01033 // value attribute
01034 case 'param':
01035 $test = $params[0]['name'];
01036 if ($test && $test === 'movie') {
01037 if ($params[0]['value']) {
01038 $params[0]['value'] = $this->prefixRelPath($prefix, $params[0]['value'], $suffix);
01039 $somethingDone = 1;
01040 }
01041 }
01042 break;
01043 }
01044 if ($somethingDone) {
01045 $tagParts = preg_split('/\s+/s',$v,2);
01046 $tagParts[1]=$this->compileTagAttribs($params[0],$params[1]);
01047 $parts[$k] = '<'.trim(strtolower($firstTagName).' '.$tagParts[1]).$tagEnd;
01048 }
01049 }
01050 }
01051 $content = implode('',$parts);
01052
01053 // Fix <style> section:
01054 $prefix = isset($alternatives['style']) ? $alternatives['style'] : $main_prefix;
01055 if (strlen($prefix)) {
01056 $parts = $this->splitIntoBlock('style',$content);
01057 foreach($parts as $k => $v) {
01058 if ($k%2) {
01059 $parts[$k] = preg_replace('/(url[[:space:]]*\([[:space:]]*["\']?)([^"\')]*)(["\']?[[:space:]]*\))/i','\1'.$prefix.'\2'.$suffix.'\3',$parts[$k]);
01060 }
01061 }
01062 $content = implode('',$parts);
01063 }
01064
01065 return $content;
01066 }
01067
01068
01069
01070
01071
01072
01073
01074
01075
01076
01077 function prefixRelPath($prefix, $srcVal, $suffix = '') {
01078
01079
01080 if (substr($srcVal, 0, 1) != '/' && substr($srcVal, 0, 1) != '#') {
01081 $urlParts = parse_url($srcVal);
01082
01083 if (!$urlParts['scheme']) {
01084 $srcVal = $prefix . $srcVal . $suffix;
01085 }
01086 }
01087 return $srcVal;
01088 }
01089
01090
01091
01092
01093
01094
01095
01096
01097
01098
01099
01100 function cleanFontTags($value,$keepFace=0,$keepSize=0,$keepColor=0) {
01101 $fontSplit = $this->splitIntoBlock('font',$value);
01102 foreach ($fontSplit as $k => $v) {
01103 if ($k%2) {
01104 $attribArray=$this->get_tag_attributes_classic($this->getFirstTag($v));
01105 $newAttribs=array();
01106 if ($keepFace && $attribArray['face']) $newAttribs[]='face="'.$attribArray['face'].'"';
01107 if ($keepSize && $attribArray['size']) $newAttribs[]='size="'.$attribArray['size'].'"';
01108 if ($keepColor && $attribArray['color']) $newAttribs[]='color="'.$attribArray['color'].'"';
01109
01110 $innerContent = $this->cleanFontTags($this->removeFirstAndLastTag($v),$keepFace,$keepSize,$keepColor);
01111 if (count($newAttribs)) {
01112 $fontSplit[$k]='<font '.implode(' ',$newAttribs).'>'.$innerContent.'</font>';
01113 } else {
01114 $fontSplit[$k]=$innerContent;
01115 }
01116 }
01117 }
01118 return implode('',$fontSplit);
01119 }
01120
01121
01122
01123
01124
01125
01126
01127
01128
01129
01130 function mapTags($value,$tags=array(),$ltChar='<',$ltChar2='<') {
01131
01132 foreach($tags as $from => $to) {
01133 $value = preg_replace('/'.preg_quote($ltChar).'(\/)?'.$from.'\s([^\>])*(\/)?\>/', $ltChar2.'$1'.$to.' $2$3>', $value);
01134 }
01135 return $value;
01136 }
01137
01138
01139
01140
01141
01142
01143
01144
01145 function unprotectTags($content,$tagList='') {
01146 $tagsArray = t3lib_div::trimExplode(',',$tagList,1);
01147 $contentParts = explode('<',$content);
01148 next($contentParts);
01149 while(list($k,$tok)=each($contentParts)) {
01150 $firstChar = substr($tok,0,1);
01151 if (strcmp(trim($firstChar),'')) {
01152 $subparts = explode('>',$tok,2);
01153 $tagEnd = strlen($subparts[0]);
01154 if (strlen($tok)!=$tagEnd) {
01155 $endTag = $firstChar=='/' ? 1 : 0;
01156 $tagContent = substr($tok,$endTag,$tagEnd-$endTag);
01157 $tagParts = preg_split('/\s+/s',$tagContent,2);
01158 $tagName = strtolower($tagParts[0]);
01159 if (!strcmp($tagList,'') || in_array($tagName,$tagsArray)) {
01160 $contentParts[$k] = '<'.$subparts[0].'>'.$subparts[1];
01161 } else $contentParts[$k] = '<'.$tok;
01162 } else $contentParts[$k] = '<'.$tok;
01163 } else $contentParts[$k] = '<'.$tok;
01164 }
01165
01166 return implode('',$contentParts);
01167 }
01168
01169
01170
01171
01172
01173
01174
01175
01176
01177
01178 function stripTagsExcept($value,$tagList) {
01179 $tags=t3lib_div::trimExplode(',',$tagList,1);
01180 $forthArr=array();
01181 $backArr=array();
01182 foreach ($tags as $theTag) {
01183 $forthArr[$theTag]=md5($theTag);
01184 $backArr[md5($theTag)]=$theTag;
01185 }
01186 $value = $this->mapTags($value,$forthArr,'<','_');
01187 $value=strip_tags($value);
01188 $value = $this->mapTags($value,$backArr,'_','<');
01189 return $value;
01190 }
01191
01192
01193
01194
01195
01196
01197
01198
01199
01200
01201 function caseShift($str,$flag,$cacheKey='') {
01202 $cacheKey .= $flag?1:0;
01203 if (is_array($str)) {
01204 if (!$cacheKey || !isset($this->caseShift_cache[$cacheKey])) {
01205 reset($str);
01206 foreach ($str as $k => $v) {
01207 if (!$flag) {
01208 $str[$k] = strtoupper($v);
01209 }
01210 }
01211 if ($cacheKey) $this->caseShift_cache[$cacheKey]=$str;
01212 } else {
01213 $str = $this->caseShift_cache[$cacheKey];
01214 }
01215 } elseif (!$flag) { $str = strtoupper($str); }
01216 return $str;
01217 }
01218
01219
01220
01221
01222
01223
01224
01225
01226
01227
01228 function compileTagAttribs($tagAttrib,$meta=array(), $xhtmlClean=0) {
01229 $accu=array();
01230 foreach ($tagAttrib as $k =>$v) {
01231 if ($xhtmlClean) {
01232 $attr=strtolower($k);
01233 if (strcmp($v,'') || isset($meta[$k]['dashType'])) {
01234 $attr.='="'.htmlspecialchars($v).'"';
01235 }
01236 } else {
01237 $attr=$meta[$k]['origTag']?$meta[$k]['origTag']:$k;
01238 if (strcmp($v,'') || isset($meta[$k]['dashType'])) {
01239 $dash=$meta[$k]['dashType']?$meta[$k]['dashType']:(t3lib_div::testInt($v)?'':'"');
01240 $attr.='='.$dash.$v.$dash;
01241 }
01242 }
01243 $accu[]=$attr;
01244 }
01245 return implode(' ',$accu);
01246 }
01247
01248
01249
01250
01251
01252
01253
01254
01255
01256 function get_tag_attributes_classic($tag,$deHSC=0) {
01257 $attr=$this->get_tag_attributes($tag,$deHSC);
01258 return is_array($attr[0])?$attr[0]:array();
01259 }
01260
01261
01262
01263
01264
01265
01266
01267
01268
01269 function indentLines($content, $number=1, $indentChar=TAB) {
01270 $preTab = str_pad('', $number*strlen($indentChar), $indentChar);
01271 $lines = explode(LF,str_replace(CR,'',$content));
01272 foreach ($lines as $k => $v) {
01273 $lines[$k] = $preTab.$v;
01274 }
01275 return implode(LF, $lines);
01276 }
01277
01278
01279
01280
01281
01282
01283
01284
01285
01286 function HTMLparserConfig($TSconfig,$keepTags=array()) {
01287 // Allow tags (base list, merged with incoming array)
01288 $alTags = array_flip(t3lib_div::trimExplode(',',strtolower($TSconfig['allowTags']),1));
01289 $keepTags = array_merge($alTags,$keepTags);
01290
01291 // Set config properties.
01292 if (is_array($TSconfig['tags.'])) {
01293 foreach ($TSconfig['tags.'] as $key => $tagC) {
01294 if (!is_array($tagC) && $key==strtolower($key)) {
01295 if (!strcmp($tagC,'0')) unset($keepTags[$key]);
01296 if (!strcmp($tagC,'1') && !isset($keepTags[$key])) $keepTags[$key]=1;
01297 }
01298 }
01299
01300 foreach ($TSconfig['tags.'] as $key => $tagC) {
01301 if (is_array($tagC) && $key==strtolower($key)) {
01302 $key=substr($key,0,-1);
01303 if (!is_array($keepTags[$key])) $keepTags[$key]=array();
01304 if (is_array($tagC['fixAttrib.'])) {
01305 foreach ($tagC['fixAttrib.'] as $atName => $atConfig) {
01306 if (is_array($atConfig)) {
01307 $atName=substr($atName,0,-1);
01308 if (!is_array($keepTags[$key]['fixAttrib'][$atName])) {
01309 $keepTags[$key]['fixAttrib'][$atName]=array();
01310 }
01311 $keepTags[$key]['fixAttrib'][$atName] = array_merge($keepTags[$key]['fixAttrib'][$atName],$atConfig); // Candidate for t3lib_div::array_merge() if integer-keys will some day make trouble...
01312 if (strcmp($keepTags[$key]['fixAttrib'][$atName]['range'],'')) $keepTags[$key]['fixAttrib'][$atName]['range'] = t3lib_div::trimExplode(',',$keepTags[$key]['fixAttrib'][$atName]['range']);
01313 if (strcmp($keepTags[$key]['fixAttrib'][$atName]['list'],'')) $keepTags[$key]['fixAttrib'][$atName]['list'] = t3lib_div::trimExplode(',',$keepTags[$key]['fixAttrib'][$atName]['list']);
01314 }
01315 }
01316 }
01317 unset($tagC['fixAttrib.']);
01318 unset($tagC['fixAttrib']);
01319 $keepTags[$key] = array_merge($keepTags[$key],$tagC); // Candidate for t3lib_div::array_merge() if integer-keys will some day make trouble...
01320 }
01321 }
01322 }
01323 // localNesting
01324 if ($TSconfig['localNesting']) {
01325 $lN = t3lib_div::trimExplode(',',strtolower($TSconfig['localNesting']),1);
01326 foreach ($lN as $tn) {
01327 if (isset($keepTags[$tn])) {
01328 $keepTags[$tn]['nesting']=1;
01329 }
01330 }
01331 }
01332 if ($TSconfig['globalNesting']) {
01333 $lN = t3lib_div::trimExplode(',',strtolower($TSconfig['globalNesting']),1);
01334 foreach ($lN as $tn) {
01335 if (isset($keepTags[$tn])) {
01336 if (!is_array($keepTags[$tn])) $keepTags[$tn]=array();
01337 $keepTags[$tn]['nesting']='global';
01338 }
01339 }
01340 }
01341 if ($TSconfig['rmTagIfNoAttrib']) {
01342 $lN = t3lib_div::trimExplode(',',strtolower($TSconfig['rmTagIfNoAttrib']),1);
01343 foreach ($lN as $tn) {
01344 if (isset($keepTags[$tn])) {
01345 if (!is_array($keepTags[$tn])) $keepTags[$tn]=array();
01346 $keepTags[$tn]['rmTagIfNoAttrib']=1;
01347 }
01348 }
01349 }
01350 if ($TSconfig['noAttrib']) {
01351 $lN = t3lib_div::trimExplode(',',strtolower($TSconfig['noAttrib']),1);
01352 foreach ($lN as $tn) {
01353 if (isset($keepTags[$tn])) {
01354 if (!is_array($keepTags[$tn])) $keepTags[$tn]=array();
01355 $keepTags[$tn]['allowedAttribs']=0;
01356 }
01357 }
01358 }
01359 if ($TSconfig['removeTags']) {
01360 $lN = t3lib_div::trimExplode(',',strtolower($TSconfig['removeTags']),1);
01361 foreach ($lN as $tn) {
01362 $keepTags[$tn]=array();
01363 $keepTags[$tn]['allowedAttribs']=0;
01364 $keepTags[$tn]['rmTagIfNoAttrib']=1;
01365 }
01366 }
01367
01368 // Create additional configuration:
01369 $addConfig=array();
01370 if ($TSconfig['xhtml_cleaning']) {
01371 $addConfig['xhtml']=1;
01372 }
01373
01374 return array(
01375 $keepTags,
01376 ''.$TSconfig['keepNonMatchedTags'],
01377 intval($TSconfig['htmlSpecialChars']),
01378 $addConfig
01379 );
01380 }
01381
01382
01383
01384
01385
01386
01387
01388
01389
01390
01391
01392
01393
01394
01395
01396
01397
01398
01399
01400
01401
01402
01403
01404
01405
01406
01407 function XHTML_clean($content) {
01408 $content = $this->HTMLcleaner(
01409 $content,
01410 array(), // No tags treated specially
01411 1, // Keep ALL tags.
01412 0, // All content is htmlspecialchar()'ed (or ??) - if we do, <script> content will break...
01413 array('xhtml' => 1)
01414 );
01415 return $content;
01416 }
01417
01418
01419
01420
01421
01422
01423
01424
01425
01426
01427
01428
01429 function processTag($value,$conf,$endTag,$protected=0) {
01430 // Return immediately if protected or no parameters
01431 if ($protected || !count($conf)) return $value;
01432 // OK then, begin processing for XHTML output:
01433 // STILL VERY EXPERIMENTAL!!
01434 if ($conf['xhtml']) {
01435 if ($endTag) { // Endtags are just set lowercase right away
01436 $value = strtolower($value);
01437 } elseif (substr($value,0,4)!='<!--') { // ... and comments are ignored.
01438 $inValue = substr($value,1,(substr($value,-2)=='/>'?-2:-1)); // Finding inner value with out < >
01439 list($tagName,$tagP)=preg_split('/\s+/s',$inValue,2); // Separate attributes and tagname
01440 $tagName = strtolower($tagName);
01441
01442 // Process attributes
01443 $tagAttrib = $this->get_tag_attributes($tagP);
01444 if (!strcmp($tagName,'img') && !isset($tagAttrib[0]['alt'])) $tagAttrib[0]['alt']=''; // Set alt attribute for all images (not XHTML though...)
01445 if (!strcmp($tagName,'script') && !isset($tagAttrib[0]['type'])) $tagAttrib[0]['type']='text/javascript'; // Set type attribute for all script-tags
01446 $outA=array();
01447 foreach ($tagAttrib[0] as $attrib_name => $attrib_value) {
01448 // Set attributes: lowercase, always in quotes, with htmlspecialchars converted.
01449 $outA[]=$attrib_name.'="'.$this->bidir_htmlspecialchars($attrib_value,2).'"';
01450 }
01451 $newTag='<'.trim($tagName.' '.implode(' ',$outA));
01452
01453 if (t3lib_div::inList('img,br,hr,meta,link,base,area,input,param,col',$tagName) || substr($value,-2)=='/>') {
01454 $newTag.=' />';
01455 } else {
01456 $newTag.='>';
01457 }
01458 $value = $newTag;
01459 }
01460 }
01461
01462 return $value;
01463 }
01464
01465
01466
01467
01468
01469
01470
01471
01472
01473
01474 function processContent($value,$dir,$conf) {
01475 if ($dir!=0) $value = $this->bidir_htmlspecialchars($value,$dir);
01476 return $value;
01477 }
01478 }
01479
01480
01481
01482 if (defined('TYPO3_MODE') && $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['t3lib/class.t3lib_parsehtml.php']) {
01483 include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['t3lib/class.t3lib_parsehtml.php']);
01484 }
01485
01486 ?>