TYPO3 API  SVNRelease
class.t3lib_parsehtml_proc.php
Go to the documentation of this file.
00001 <?php
00002 /***************************************************************
00003  *  Copyright notice
00004  *
00005  *  (c) 1999-2011 Kasper Skårhøj (kasperYYYY@typo3.com)
00006  *  All rights reserved
00007  *
00008  *  This script is part of the TYPO3 project. The TYPO3 project is
00009  *  free software; you can redistribute it and/or modify
00010  *  it under the terms of the GNU General Public License as published by
00011  *  the Free Software Foundation; either version 2 of the License, or
00012  *  (at your option) any later version.
00013  *
00014  *  The GNU General Public License can be found at
00015  *  http://www.gnu.org/copyleft/gpl.html.
00016  *  A copy is found in the textfile GPL.txt and important notices to the license
00017  *  from the author is found in LICENSE.txt distributed with these scripts.
00018  *
00019  *
00020  *  This script is distributed in the hope that it will be useful,
00021  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00022  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00023  *  GNU General Public License for more details.
00024  *
00025  *  This copyright notice MUST APPEAR in all copies of the script!
00026  ***************************************************************/
00027 /**
00028  * Functions for parsing HTML, specially for TYPO3 processing in relation to TCEmain and Rich Text Editor (RTE)
00029  *
00030  * $Id: class.t3lib_parsehtml_proc.php 10121 2011-01-18 20:15:30Z ohader $
00031  * Revised for TYPO3 3.6 December/2003 by Kasper Skårhøj
00032  * XHTML compatible.
00033  *
00034  * @author  Kasper Skårhøj <kasperYYYY@typo3.com>
00035  * @internal
00036  */
00037 /**
00038  * [CLASS/FUNCTION INDEX of SCRIPT]
00039  *
00040  *
00041  *
00042  *  103: class t3lib_parsehtml_proc extends t3lib_parsehtml
00043  *  138:     function init($elRef='',$recPid=0)
00044  *  150:     function setRelPath($path)
00045  *  174:     function evalWriteFile($pArr,$currentRecord)
00046  *
00047  *            SECTION: Main function
00048  *  232:     function RTE_transform($value,$specConf,$direction='rte',$thisConfig=array())
00049  *
00050  *            SECTION: Specific RTE TRANSFORMATION functions
00051  *  398:     function TS_images_db($value)
00052  *  550:     function TS_images_rte($value)
00053  *  589:     function TS_reglinks($value,$direction)
00054  *  626:     function TS_links_db($value)
00055  *  675:     function TS_links_rte($value)
00056  *  760:     function TS_preserve_db($value)
00057  *  784:     function TS_preserve_rte($value)
00058  *  805:     function TS_transform_db($value,$css=FALSE)
00059  *  922:     function transformStyledATags($value)
00060  *  948:     function TS_transform_rte($value,$css=0)
00061  * 1019:     function TS_strip_db($value)
00062  *
00063  *            SECTION: Generic RTE transformation, analysis and helper functions
00064  * 1050:     function getURL($url)
00065  * 1064:     function HTMLcleaner_db($content,$tagList='')
00066  * 1091:     function getKeepTags($direction='rte',$tagList='')
00067  * 1200:     function divideIntoLines($value,$count=5,$returnArray=FALSE)
00068  * 1304:     function setDivTags($value,$dT='p')
00069  * 1349:     function internalizeFontTags($value)
00070  * 1385:     function siteUrl()
00071  * 1395:     function rteImageStorageDir()
00072  * 1407:     function removeTables($value,$breakChar='<br />')
00073  * 1439:     function defaultTStagMapping($code,$direction='rte')
00074  * 1462:     function getWHFromAttribs($attribArray)
00075  * 1489:     function urlInfoForLinkTags($url)
00076  * 1548:     function TS_AtagToAbs($value,$dontSetRTEKEEP=FALSE)
00077  *
00078  * TOTAL FUNCTIONS: 28
00079  * (This index is automatically created/updated by the extension "extdeveval")
00080  *
00081  */
00082 
00083 
00084 /**
00085  * Class for parsing HTML for the Rich Text Editor. (also called transformations)
00086  *
00087  * @author  Kasper Skårhøj <kasperYYYY@typo3.com>
00088  * @package TYPO3
00089  * @subpackage t3lib
00090  */
00091 class t3lib_parsehtml_proc extends t3lib_parsehtml {
00092 
00093         // Static:
00094     var $blockElementList = 'PRE,UL,OL,H1,H2,H3,H4,H5,H6,ADDRESS,DL,DD'; // List of tags for these elements
00095 
00096         // Internal, static:
00097     var $recPid = 0; // Set this to the pid of the record manipulated by the class.
00098     var $elRef = ''; // Element reference [table]:[field], eg. "tt_content:bodytext"
00099     var $relPath = ''; // Relative path
00100     var $relBackPath = ''; // Relative back-path
00101     public $tsConfig = array(); // Current Page TSConfig
00102     var $procOptions = ''; // Set to the TSconfig options coming from Page TSconfig
00103 
00104         // Internal, dynamic
00105     var $TS_transform_db_safecounter = 100; // Run-away brake for recursive calls.
00106     var $rte_p = ''; // Parameters from TCA types configuration related to the RTE
00107     var $getKeepTags_cache = array(); // Data caching for processing function
00108     var $allowedClasses = array(); // Storage of the allowed CSS class names in the RTE
00109     var $preserveTags = ''; // Set to tags to preserve from Page TSconfig configuration
00110 
00111 
00112     /**
00113      * Initialize, setting element reference and record PID
00114      *
00115      * @param   string      Element reference, eg "tt_content:bodytext"
00116      * @param   integer     PID of the record (page id)
00117      * @return  void
00118      */
00119     function init($elRef = '', $recPid = 0) {
00120         $this->recPid = $recPid;
00121         $this->elRef = $elRef;
00122     }
00123 
00124     /**
00125      * Setting the ->relPath and ->relBackPath to proper values so absolute references to links and images can be converted to relative dittos.
00126      * This is used when editing files with the RTE
00127      *
00128      * @param   string      The relative path from PATH_site to the place where the file being edited is. Eg. "fileadmin/static".
00129      * @return  void        There is no output, it is set in internal variables. With the above example of "fileadmin/static" as input this will yield ->relPath to be "fileadmin/static/" and ->relBackPath to be "../../"
00130      */
00131     function setRelPath($path) {
00132         $path = trim($path);
00133         $path = preg_replace('/^\//', '', $path);
00134         $path = preg_replace('/\/$/', '', $path);
00135         if ($path) {
00136             $this->relPath = $path;
00137             $this->relBackPath = '';
00138             $partsC = count(explode('/', $this->relPath));
00139             for ($a = 0; $a < $partsC; $a++) {
00140                 $this->relBackPath .= '../';
00141             }
00142             $this->relPath .= '/';
00143         }
00144     }
00145 
00146     /**
00147      * Evaluate the environment for editing a staticFileEdit file.
00148      * Called for almost all fields being saved in the database. Is called without an instance of the object: t3lib_parsehtml_proc::evalWriteFile()
00149      *
00150      * @param   array       Parameters for the current field as found in types-config
00151      * @param   array       Current record we are editing.
00152      * @return  mixed       On success an array with various information is returned, otherwise a string with an error message
00153      * @see t3lib_TCEmain, t3lib_transferData
00154      */
00155     function evalWriteFile($pArr, $currentRecord) {
00156 
00157             // Write file configuration:
00158         if (is_array($pArr)) {
00159             if ($GLOBALS['TYPO3_CONF_VARS']['BE']['staticFileEditPath']
00160                 && substr($GLOBALS['TYPO3_CONF_VARS']['BE']['staticFileEditPath'], -1) == '/'
00161                 && @is_dir(PATH_site . $GLOBALS['TYPO3_CONF_VARS']['BE']['staticFileEditPath'])) {
00162 
00163                 $SW_p = $pArr['parameters'];
00164                 $SW_editFileField = trim($SW_p[0]);
00165                 $SW_editFile = $currentRecord[$SW_editFileField];
00166                 if ($SW_editFileField && $SW_editFile && t3lib_div::validPathStr($SW_editFile)) {
00167                     $SW_relpath = $GLOBALS['TYPO3_CONF_VARS']['BE']['staticFileEditPath'] . $SW_editFile;
00168                     $SW_editFile = PATH_site . $SW_relpath;
00169                     if (@is_file($SW_editFile)) {
00170                         return array(
00171                             'editFile' => $SW_editFile,
00172                             'relEditFile' => $SW_relpath,
00173                             'contentField' => trim($SW_p[1]),
00174                             'markerField' => trim($SW_p[2]),
00175                             'loadFromFileField' => trim($SW_p[3]),
00176                             'statusField' => trim($SW_p[4])
00177                         );
00178                     } else {
00179                         return "ERROR: Editfile '" . $SW_relpath . "' did not exist";
00180                     }
00181                 } else {
00182                     return "ERROR: Edit file name could not be found or was bad.";
00183                 }
00184             } else {
00185                 return "ERROR: staticFileEditPath was not set, not set correctly or did not exist!";
00186             }
00187         }
00188     }
00189 
00190 
00191     /**********************************************
00192      *
00193      * Main function
00194      *
00195      **********************************************/
00196 
00197     /**
00198      * Transform value for RTE based on specConf in the direction specified by $direction (rte/db)
00199      * This is the main function called from tcemain and transfer data classes
00200      *
00201      * @param   string      Input value
00202      * @param   array       Special configuration for a field; This is coming from the types-configuration of the field in the TCA. In the types-configuration you can setup features for the field rendering and in particular the RTE takes al its major configuration options from there!
00203      * @param   string      Direction of the transformation. Two keywords are allowed; "db" or "rte". If "db" it means the transformation will clean up content coming from the Rich Text Editor and goes into the database. The other direction, "rte", is of course when content is coming from database and must be transformed to fit the RTE.
00204      * @param   array       Parsed TypoScript content configuring the RTE, probably coming from Page TSconfig.
00205      * @return  string      Output value
00206      * @see t3lib_TCEmain::fillInFieldArray(), t3lib_transferData::renderRecord_typesProc()
00207      */
00208     function RTE_transform($value, $specConf, $direction = 'rte', $thisConfig = array()) {
00209 
00210             // Init:
00211         $this->tsConfig = $thisConfig;
00212         $this->procOptions = $thisConfig['proc.'];
00213         $this->preserveTags = strtoupper(implode(',', t3lib_div::trimExplode(',', $this->procOptions['preserveTags'])));
00214 
00215             // dynamic configuration of blockElementList
00216         if ($this->procOptions['blockElementList']) {
00217             $this->blockElementList = $this->procOptions['blockElementList'];
00218         }
00219 
00220             // Get parameters for rte_transformation:
00221         $p = $this->rte_p = t3lib_BEfunc::getSpecConfParametersFromArray($specConf['rte_transform']['parameters']);
00222 
00223             // Setting modes:
00224         if (strcmp($this->procOptions['overruleMode'], '')) {
00225             $modes = array_unique(t3lib_div::trimExplode(',', $this->procOptions['overruleMode']));
00226         } else {
00227             $modes = array_unique(t3lib_div::trimExplode('-', $p['mode']));
00228         }
00229         $revmodes = array_flip($modes);
00230 
00231             // Find special modes and extract them:
00232         if (isset($revmodes['ts'])) {
00233             $modes[$revmodes['ts']] = 'ts_transform,ts_preserve,ts_images,ts_links';
00234         }
00235             // Find special modes and extract them:
00236         if (isset($revmodes['ts_css'])) {
00237             $modes[$revmodes['ts_css']] = 'css_transform,ts_images,ts_links';
00238         }
00239 
00240             // Make list unique
00241         $modes = array_unique(t3lib_div::trimExplode(',', implode(',', $modes), 1));
00242 
00243             // Reverse order if direction is "rte"
00244         if ($direction == 'rte') {
00245             $modes = array_reverse($modes);
00246         }
00247 
00248             // Getting additional HTML cleaner configuration. These are applied either before or after the main transformation is done and is thus totally independant processing options you can set up:
00249         $entry_HTMLparser = $this->procOptions['entryHTMLparser_' . $direction] ? $this->HTMLparserConfig($this->procOptions['entryHTMLparser_' . $direction . '.']) : '';
00250         $exit_HTMLparser = $this->procOptions['exitHTMLparser_' . $direction] ? $this->HTMLparserConfig($this->procOptions['exitHTMLparser_' . $direction . '.']) : '';
00251 
00252             // Line breaks of content is unified into char-10 only (removing char 13)
00253         if (!$this->procOptions['disableUnifyLineBreaks']) {
00254             $value = str_replace(CRLF, LF, $value);
00255         }
00256 
00257             // In an entry-cleaner was configured, pass value through the HTMLcleaner with that:
00258         if (is_array($entry_HTMLparser)) {
00259             $value = $this->HTMLcleaner($value, $entry_HTMLparser[0], $entry_HTMLparser[1], $entry_HTMLparser[2], $entry_HTMLparser[3]);
00260         }
00261 
00262             // Traverse modes:
00263         foreach ($modes as $cmd) {
00264                 // ->DB
00265             if ($direction == 'db') {
00266                     // Checking for user defined transformation:
00267                 if ($_classRef = $GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['transformation'][$cmd]) {
00268                     $_procObj = t3lib_div::getUserObj($_classRef);
00269                     $_procObj->pObj = $this;
00270                     $_procObj->transformationKey = $cmd;
00271                     $value = $_procObj->transform_db($value, $this);
00272                 } else { // ... else use defaults:
00273                     switch ($cmd) {
00274                         case 'ts_images':
00275                             $value = $this->TS_images_db($value);
00276                         break;
00277                         case 'ts_reglinks':
00278                             $value = $this->TS_reglinks($value, 'db');
00279                         break;
00280                         case 'ts_links':
00281                             $value = $this->TS_links_db($value);
00282                         break;
00283                         case 'ts_preserve':
00284                             $value = $this->TS_preserve_db($value);
00285                         break;
00286                         case 'ts_transform':
00287                         case 'css_transform':
00288                             $value = str_replace(CR, '', $value); // Has a very disturbing effect, so just remove all '13' - depend on '10'
00289                             $this->allowedClasses = t3lib_div::trimExplode(',', $this->procOptions['allowedClasses'], 1);
00290                             $value = $this->TS_transform_db($value, $cmd == 'css_transform');
00291                         break;
00292                         case 'ts_strip':
00293                             $value = $this->TS_strip_db($value);
00294                         break;
00295                         default:
00296                         break;
00297                     }
00298                 }
00299             }
00300                 // ->RTE
00301             if ($direction == 'rte') {
00302                     // Checking for user defined transformation:
00303                 if ($_classRef = $GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['t3lib/class.t3lib_parsehtml_proc.php']['transformation'][$cmd]) {
00304                     $_procObj = t3lib_div::getUserObj($_classRef);
00305                     $_procObj->pObj = $this;
00306                     $value = $_procObj->transform_rte($value, $this);
00307                 } else { // ... else use defaults:
00308                     switch ($cmd) {
00309                         case 'ts_images':
00310                             $value = $this->TS_images_rte($value);
00311                         break;
00312                         case 'ts_reglinks':
00313                             $value = $this->TS_reglinks($value, 'rte');
00314                         break;
00315                         case 'ts_links':
00316                             $value = $this->TS_links_rte($value);
00317                         break;
00318                         case 'ts_preserve':
00319                             $value = $this->TS_preserve_rte($value);
00320                         break;
00321                         case 'ts_transform':
00322                         case 'css_transform':
00323                             $value = str_replace(CR, '', $value); // Has a very disturbing effect, so just remove all '13' - depend on '10'
00324                             $value = $this->TS_transform_rte($value, $cmd == 'css_transform');
00325                         break;
00326                         default:
00327                         break;
00328                     }
00329                 }
00330             }
00331         }
00332 
00333             // In an exit-cleaner was configured, pass value through the HTMLcleaner with that:
00334         if (is_array($exit_HTMLparser)) {
00335             $value = $this->HTMLcleaner($value, $exit_HTMLparser[0], $exit_HTMLparser[1], $exit_HTMLparser[2], $exit_HTMLparser[3]);
00336         }
00337 
00338             // Final clean up of linebreaks:
00339         if (!$this->procOptions['disableUnifyLineBreaks']) {
00340             $value = str_replace(CRLF, LF, $value); // Make sure no \r\n sequences has entered in the meantime...
00341             $value = str_replace(LF, CRLF, $value); // ... and then change all \n into \r\n
00342         }
00343 
00344             // Return value:
00345         return $value;
00346     }
00347 
00348 
00349     /************************************
00350      *
00351      * Specific RTE TRANSFORMATION functions
00352      *
00353      *************************************/
00354 
00355     /**
00356      * Transformation handler: 'ts_images' / direction: "db"
00357      * Processing images inserted in the RTE.
00358      * This is used when content goes from the RTE to the database.
00359      * Images inserted in the RTE has an absolute URL applied to the src attribute. This URL is converted to a relative URL
00360      * If it turns out that the URL is from another website than the current the image is read from that external URL and moved to the local server.
00361      * Also "magic" images are processed here.
00362      *
00363      * @param   string      The content from RTE going to Database
00364      * @return  string      Processed content
00365      */
00366     function TS_images_db($value) {
00367 
00368             // Split content by <img> tags and traverse the resulting array for processing:
00369         $imgSplit = $this->splitTags('img', $value);
00370         foreach ($imgSplit as $k => $v) {
00371             if ($k % 2) { // image found, do processing:
00372 
00373                     // Init
00374                 $attribArray = $this->get_tag_attributes_classic($v, 1);
00375                 $siteUrl = $this->siteUrl();
00376                 $sitePath = str_replace(t3lib_div::getIndpEnv('TYPO3_REQUEST_HOST'), '', $siteUrl);
00377 
00378                 $absRef = trim($attribArray['src']); // It's always a absolute URL coming from the RTE into the Database.
00379 
00380                     // make path absolute if it is relative and we have a site path wich is not '/'
00381                 $pI = pathinfo($absRef);
00382                 if ($sitePath AND !$pI['scheme'] && t3lib_div::isFirstPartOfStr($absRef, $sitePath)) {
00383                         // if site is in a subpath (eg. /~user_jim/) this path needs to be removed because it will be added with $siteUrl
00384                     $absRef = substr($absRef, strlen($sitePath));
00385                     $absRef = $siteUrl . $absRef;
00386                 }
00387 
00388                     // External image from another URL? In that case, fetch image (unless disabled feature).
00389                 if (!t3lib_div::isFirstPartOfStr($absRef, $siteUrl) && !$this->procOptions['dontFetchExtPictures']) {
00390                     $externalFile = $this->getUrl($absRef); // Get it
00391                     if ($externalFile) {
00392                         $pU = parse_url($absRef);
00393                         $pI = pathinfo($pU['path']);
00394 
00395                         if (t3lib_div::inList('gif,png,jpeg,jpg', strtolower($pI['extension']))) {
00396                             $filename = t3lib_div::shortMD5($absRef) . '.' . $pI['extension'];
00397                             $origFilePath = PATH_site . $this->rteImageStorageDir() . 'RTEmagicP_' . $filename;
00398                             $C_origFilePath = PATH_site . $this->rteImageStorageDir() . 'RTEmagicC_' . $filename . '.' . $pI['extension'];
00399                             if (!@is_file($origFilePath)) {
00400                                 t3lib_div::writeFile($origFilePath, $externalFile);
00401                                 t3lib_div::writeFile($C_origFilePath, $externalFile);
00402                             }
00403                             $absRef = $siteUrl . $this->rteImageStorageDir() . 'RTEmagicC_' . $filename . '.' . $pI['extension'];
00404 
00405                             $attribArray['src'] = $absRef;
00406                             $params = t3lib_div::implodeAttributes($attribArray, 1);
00407                             $imgSplit[$k] = '<img ' . $params . ' />';
00408                         }
00409                     }
00410                 }
00411 
00412                     // Check image as local file (siteURL equals the one of the image)
00413                 if (t3lib_div::isFirstPartOfStr($absRef, $siteUrl)) {
00414                     $path = rawurldecode(substr($absRef, strlen($siteUrl))); // Rel-path, rawurldecoded for special characters.
00415                     $filepath = t3lib_div::getFileAbsFileName($path); // Abs filepath, locked to relative path of this project.
00416 
00417                         // Check file existence (in relative dir to this installation!)
00418                     if ($filepath && @is_file($filepath)) {
00419 
00420                             // If "magic image":
00421                         $pathPre = $this->rteImageStorageDir() . 'RTEmagicC_';
00422                         if (t3lib_div::isFirstPartOfStr($path, $pathPre)) {
00423                                 // Find original file:
00424                             $pI = pathinfo(substr($path, strlen($pathPre)));
00425                             $filename = substr($pI['basename'], 0, -strlen('.' . $pI['extension']));
00426                             $origFilePath = PATH_site . $this->rteImageStorageDir() . 'RTEmagicP_' . $filename;
00427                             if (@is_file($origFilePath)) {
00428                                 $imgObj = t3lib_div::makeInstance('t3lib_stdGraphic');
00429                                 $imgObj->init();
00430                                 $imgObj->mayScaleUp = 0;
00431                                 $imgObj->tempPath = PATH_site . $imgObj->tempPath;
00432 
00433                                 $curInfo = $imgObj->getImageDimensions($filepath); // Image dimensions of the current image
00434                                 $curWH = $this->getWHFromAttribs($attribArray); // Image dimensions as set in the image tag
00435                                     // Compare dimensions:
00436                                 if ($curWH[0] != $curInfo[0] || $curWH[1] != $curInfo[1]) {
00437                                     $origImgInfo = $imgObj->getImageDimensions($origFilePath); // Image dimensions of the current image
00438                                     $cW = $curWH[0];
00439                                     $cH = $curWH[1];
00440                                     $cH = 1000; // Make the image based on the width solely...
00441                                     $imgI = $imgObj->imageMagickConvert($origFilePath, $pI['extension'], $cW . 'm', $cH . 'm');
00442                                     if ($imgI[3]) {
00443                                         $fI = pathinfo($imgI[3]);
00444                                         @copy($imgI[3], $filepath); // Override the child file
00445                                             // Removing width and heigth form style attribute
00446                                         $attribArray['style'] = preg_replace('/((?:^|)\s*(?:width|height)\s*:[^;]*(?:$|;))/si', '', $attribArray['style']);
00447                                         $attribArray['width'] = $imgI[0];
00448                                         $attribArray['height'] = $imgI[1];
00449                                         $params = t3lib_div::implodeAttributes($attribArray, 1);
00450                                         $imgSplit[$k] = '<img ' . $params . ' />';
00451                                     }
00452                                 }
00453                             }
00454 
00455                         } elseif ($this->procOptions['plainImageMode']) { // If "plain image" has been configured:
00456 
00457                                 // Image dimensions as set in the image tag, if any
00458                             $curWH = $this->getWHFromAttribs($attribArray);
00459                             if ($curWH[0]) {
00460                                 $attribArray['width'] = $curWH[0];
00461                             }
00462                             if ($curWH[1]) {
00463                                 $attribArray['height'] = $curWH[1];
00464                             }
00465 
00466                                 // Removing width and heigth form style attribute
00467                             $attribArray['style'] = preg_replace('/((?:^|)\s*(?:width|height)\s*:[^;]*(?:$|;))/si', '', $attribArray['style']);
00468 
00469                                 // Finding dimensions of image file:
00470                             $fI = @getimagesize($filepath);
00471 
00472                                 // Perform corrections to aspect ratio based on configuration:
00473                             switch ((string) $this->procOptions['plainImageMode']) {
00474                                 case 'lockDimensions':
00475                                     $attribArray['width'] = $fI[0];
00476                                     $attribArray['height'] = $fI[1];
00477                                 break;
00478                                 case 'lockRatioWhenSmaller': // If the ratio has to be smaller, then first set the width...:
00479                                     if ($attribArray['width'] > $fI[0]) {
00480                                         $attribArray['width'] = $fI[0];
00481                                     }
00482                                 case 'lockRatio':
00483                                     if ($fI[0] > 0) {
00484                                         $attribArray['height'] = round($attribArray['width'] * ($fI[1] / $fI[0]));
00485                                     }
00486                                 break;
00487                             }
00488 
00489                                 // Compile the image tag again:
00490                             $params = t3lib_div::implodeAttributes($attribArray, 1);
00491                             $imgSplit[$k] = '<img ' . $params . ' />';
00492                         }
00493                     } else { // Remove image if it was not found in a proper position on the server!
00494 
00495                         // Commented out; removing the image tag might not be that logical...
00496                         // $imgSplit[$k]='';
00497                     }
00498                 }
00499 
00500                     // Convert abs to rel url
00501                 if ($imgSplit[$k]) {
00502                     $attribArray = $this->get_tag_attributes_classic($imgSplit[$k], 1);
00503                     $absRef = trim($attribArray['src']);
00504                     if (t3lib_div::isFirstPartOfStr($absRef, $siteUrl)) {
00505                         $attribArray['src'] = $this->relBackPath . substr($absRef, strlen($siteUrl));
00506                         if (!isset($attribArray['alt'])) {
00507                             $attribArray['alt'] = '';
00508                         } // Must have alt-attribute for XHTML compliance.
00509                         $imgSplit[$k] = '<img ' . t3lib_div::implodeAttributes($attribArray, 1, 1) . ' />';
00510                     }
00511                 }
00512             }
00513         }
00514         return implode('', $imgSplit);
00515     }
00516 
00517     /**
00518      * Transformation handler: 'ts_images' / direction: "rte"
00519      * Processing images from database content going into the RTE.
00520      * Processing includes converting the src attribute to an absolute URL.
00521      *
00522      * @param   string      Content input
00523      * @return  string      Content output
00524      */
00525     function TS_images_rte($value) {
00526 
00527         $siteUrl = $this->siteUrl();
00528         $sitePath = str_replace(t3lib_div::getIndpEnv('TYPO3_REQUEST_HOST'), '', $siteUrl);
00529 
00530             // Split content by <img> tags and traverse the resulting array for processing:
00531         $imgSplit = $this->splitTags('img', $value);
00532         foreach ($imgSplit as $k => $v) {
00533             if ($k % 2) { // image found:
00534 
00535                     // Init
00536                 $attribArray = $this->get_tag_attributes_classic($v, 1);
00537                 $absRef = trim($attribArray['src']);
00538 
00539                     // Unless the src attribute is already pointing to an external URL:
00540                 if (strtolower(substr($absRef, 0, 4)) != 'http') {
00541                     $attribArray['src'] = substr($attribArray['src'], strlen($this->relBackPath));
00542                         // if site is in a subpath (eg. /~user_jim/) this path needs to be removed because it will be added with $siteUrl
00543                     $attribArray['src'] = preg_replace('#^' . preg_quote($sitePath, '#') . '#', '', $attribArray['src']);
00544                     $attribArray['src'] = $siteUrl . $attribArray['src'];
00545                     if (!isset($attribArray['alt'])) {
00546                         $attribArray['alt'] = '';
00547                     }
00548                     $params = t3lib_div::implodeAttributes($attribArray);
00549                     $imgSplit[$k] = '<img ' . $params . ' />';
00550                 }
00551             }
00552         }
00553 
00554             // return processed content:
00555         return implode('', $imgSplit);
00556     }
00557 
00558     /**
00559      * Transformation handler: 'ts_reglinks' / direction: "db"+"rte" depending on $direction variable.
00560      * Converting <A>-tags to/from abs/rel
00561      *
00562      * @param   string      Content input
00563      * @param   string      Direction of conversion; "rte" (from database to RTE) or "db" (from RTE to database)
00564      * @return  string      Content output
00565      */
00566     function TS_reglinks($value, $direction) {
00567         $retVal = '';
00568 
00569         switch ($direction) {
00570             case 'rte':
00571                 $retVal = $this->TS_AtagToAbs($value, 1);
00572             break;
00573             case 'db':
00574                 $siteURL = $this->siteUrl();
00575                 $blockSplit = $this->splitIntoBlock('A', $value);
00576                 foreach ($blockSplit as $k => $v) {
00577                     if ($k % 2) { // block:
00578                         $attribArray = $this->get_tag_attributes_classic($this->getFirstTag($v), 1);
00579                             // If the url is local, remove url-prefix
00580                         if ($siteURL && substr($attribArray['href'], 0, strlen($siteURL)) == $siteURL) {
00581                             $attribArray['href'] = $this->relBackPath . substr($attribArray['href'], strlen($siteURL));
00582                         }
00583                         $bTag = '<a ' . t3lib_div::implodeAttributes($attribArray, 1) . '>';
00584                         $eTag = '</a>';
00585                         $blockSplit[$k] = $bTag . $this->TS_reglinks($this->removeFirstAndLastTag($blockSplit[$k]), $direction) . $eTag;
00586                     }
00587                 }
00588                 $retVal = implode('', $blockSplit);
00589             break;
00590         }
00591         return $retVal;
00592     }
00593 
00594     /**
00595      * Transformation handler: 'ts_links' / direction: "db"
00596      * Converting <A>-tags to <link tags>
00597      *
00598      * @param   string      Content input
00599      * @return  string      Content output
00600      * @see TS_links_rte()
00601      */
00602     function TS_links_db($value) {
00603 
00604             // Split content into <a> tag blocks and process:
00605         $blockSplit = $this->splitIntoBlock('A', $value);
00606         foreach ($blockSplit as $k => $v) {
00607             if ($k % 2) { // If an A-tag was found:
00608                 $attribArray = $this->get_tag_attributes_classic($this->getFirstTag($v), 1);
00609                 $info = $this->urlInfoForLinkTags($attribArray['href']);
00610 
00611                     // Check options:
00612                 $attribArray_copy = $attribArray;
00613                 unset($attribArray_copy['href']);
00614                 unset($attribArray_copy['target']);
00615                 unset($attribArray_copy['class']);
00616                 unset($attribArray_copy['title']);
00617                 unset($attribArray_copy['external']);
00618                 if ($attribArray_copy['rteerror']) { // Unset "rteerror" and "style" attributes if "rteerror" is set!
00619                     unset($attribArray_copy['style']);
00620                     unset($attribArray_copy['rteerror']);
00621                 }
00622                 if (!count($attribArray_copy)) { // Only if href, target and class are the only attributes, we can alter the link!
00623                         // Quoting class and title attributes if they contain spaces
00624                     $attribArray['class'] = preg_match('/ /', $attribArray['class']) ? '"' . $attribArray['class'] . '"' : $attribArray['class'];
00625                     $attribArray['title'] = preg_match('/ /', $attribArray['title']) ? '"' . $attribArray['title'] . '"' : $attribArray['title'];
00626                         // Creating the TYPO3 pseudo-tag "<LINK>" for the link (includes href/url, target and class attributes):
00627                         // If external attribute is set, keep the href unchanged
00628                     $href = $attribArray['external'] ? $attribArray['href'] : $info['url'] . ($info['query'] ? ',0,' . $info['query'] : '');
00629                     $bTag = '<link ' . $href . ($attribArray['target'] ? ' ' . $attribArray['target'] : (($attribArray['class'] || $attribArray['title']) ? ' -' : '')) . ($attribArray['class'] ? ' ' . $attribArray['class'] : ($attribArray['title'] ? ' -' : '')) . ($attribArray['title'] ? ' ' . $attribArray['title'] : '') . '>';
00630                     $eTag = '</link>';
00631                     $blockSplit[$k] = $bTag . $this->TS_links_db($this->removeFirstAndLastTag($blockSplit[$k])) . $eTag;
00632                 } else { // ... otherwise store the link as a-tag.
00633                         // Unsetting 'rtekeep' attribute if that had been set.
00634                     unset($attribArray['rtekeep']);
00635                     if (!$attribArray['external']) {
00636                             // If the url is local, remove url-prefix
00637                         $siteURL = $this->siteUrl();
00638                         if ($siteURL && substr($attribArray['href'], 0, strlen($siteURL)) == $siteURL) {
00639                             $attribArray['href'] = $this->relBackPath . substr($attribArray['href'], strlen($siteURL));
00640                         }
00641                     }
00642                     unset($attribArray['external']);
00643                     $bTag = '<a ' . t3lib_div::implodeAttributes($attribArray, 1) . '>';
00644                     $eTag = '</a>';
00645                     $blockSplit[$k] = $bTag . $this->TS_links_db($this->removeFirstAndLastTag($blockSplit[$k])) . $eTag;
00646                 }
00647             }
00648         }
00649         return implode('', $blockSplit);
00650     }
00651 
00652     /**
00653      * Transformation handler: 'ts_links' / direction: "rte"
00654      * Converting <link tags> to <A>-tags
00655      *
00656      * @param   string      Content input
00657      * @return  string      Content output
00658      * @see TS_links_rte()
00659      */
00660     function TS_links_rte($value) {
00661         $value = $this->TS_AtagToAbs($value);
00662 
00663             // Split content by the TYPO3 pseudo tag "<link>":
00664         $blockSplit = $this->splitIntoBlock('link', $value, 1);
00665         $siteUrl = $this->siteUrl();
00666         foreach ($blockSplit as $k => $v) {
00667             $error = '';
00668             if ($k % 2) { // block:
00669                 $tagCode = t3lib_div::unQuoteFilenames(trim(substr($this->getFirstTag($v), 0, -1)), true);
00670                 $link_param = $tagCode[1];
00671                 $href = '';
00672                     // Parsing the typolink data. This parsing is roughly done like in tslib_content->typolink()
00673                 if (strstr($link_param, '@')) { // mailadr
00674                     $href = 'mailto:' . preg_replace('/^mailto:/i', '', $link_param);
00675                 } elseif (substr($link_param, 0, 1) == '#') { // check if anchor
00676                     $href = $siteUrl . $link_param;
00677                 } else {
00678                     $fileChar = intval(strpos($link_param, '/'));
00679                     $urlChar = intval(strpos($link_param, '.'));
00680                     $external = FALSE;
00681                         // Parse URL:
00682                     $pU = parse_url($link_param);
00683                         // Detects if a file is found in site-root OR is a simulateStaticDocument.
00684                     list($rootFileDat) = explode('?', $link_param);
00685                     $rFD_fI = pathinfo($rootFileDat);
00686                     if (trim($rootFileDat) && !strstr($link_param, '/') && (@is_file(PATH_site . $rootFileDat) || t3lib_div::inList('php,html,htm', strtolower($rFD_fI['extension'])))) {
00687                         $href = $siteUrl . $link_param;
00688                     } elseif ($pU['scheme'] || ($urlChar && (!$fileChar || $urlChar < $fileChar))) {
00689                             // url (external): if has scheme or if a '.' comes before a '/'.
00690                         $href = $link_param;
00691                         if (!$pU['scheme']) {
00692                             $href = 'http://' . $href;
00693                         }
00694                         $external = TRUE;
00695                     } elseif ($fileChar) { // file (internal)
00696                         $href = $siteUrl . $link_param;
00697                     } else { // integer or alias (alias is without slashes or periods or commas, that is 'nospace,alphanum_x,lower,unique' according to tables.php!!)
00698                             // Splitting the parameter by ',' and if the array counts more than 1 element it's a id/type/parameters triplet
00699                         $pairParts = t3lib_div::trimExplode(',', $link_param, TRUE);
00700                         $idPart = $pairParts[0];
00701                         $link_params_parts = explode('#', $idPart);
00702                         $idPart = trim($link_params_parts[0]);
00703                         $sectionMark = trim($link_params_parts[1]);
00704                         if (!strcmp($idPart, '')) {
00705                             $idPart = $this->recPid;
00706                         } // If no id or alias is given, set it to class record pid
00707                             // Checking if the id-parameter is an alias.
00708                         if (!t3lib_div::testInt($idPart)) {
00709                             list($idPartR) = t3lib_BEfunc::getRecordsByField('pages', 'alias', $idPart);
00710                             $idPart = intval($idPartR['uid']);
00711                         }
00712                         $page = t3lib_BEfunc::getRecord('pages', $idPart);
00713                         if (is_array($page)) { // Page must exist...
00714                             $href = $siteUrl . '?id=' . $idPart . ($pairParts[2] ? $pairParts[2] : '') . ($sectionMark ? '#' . $sectionMark : '');
00715                                 // linkHandler - allowing links to start with registerd linkHandler e.g.. "record:"
00716                         } elseif (isset($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['tslib/class.tslib_content.php']['typolinkLinkHandler'][array_shift(explode(':', $link_param))])) {
00717                             $href = $link_param;
00718                         } else {
00719                             #$href = '';
00720                             $href = $siteUrl . '?id=' . $link_param;
00721                             $error = 'No page found: ' . $idPart;
00722                         }
00723                     }
00724                 }
00725 
00726                     // Setting the A-tag:
00727                 $bTag = '<a href="' . htmlspecialchars($href) . '"' .
00728                         ($tagCode[2] && $tagCode[2] != '-' ? ' target="' . htmlspecialchars($tagCode[2]) . '"' : '') .
00729                         ($tagCode[3] && $tagCode[3] != '-' ? ' class="' . htmlspecialchars($tagCode[3]) . '"' : '') .
00730                         ($tagCode[4] ? ' title="' . htmlspecialchars($tagCode[4]) . '"' : '') .
00731                         ($external ? ' external="1"' : '') .
00732                         ($error ? ' rteerror="' . htmlspecialchars($error) . '" style="background-color: yellow; border:2px red solid; color: black;"' : '') . // Should be OK to add the style; the transformation back to databsae will remove it...
00733                         '>';
00734                 $eTag = '</a>';
00735                 $blockSplit[$k] = $bTag . $this->TS_links_rte($this->removeFirstAndLastTag($blockSplit[$k])) . $eTag;
00736             }
00737         }
00738 
00739             // Return content:
00740         return implode('', $blockSplit);
00741     }
00742 
00743     /**
00744      * Preserve special tags
00745      *
00746      * @param   string      Content input
00747      * @return  string      Content output
00748      */
00749     function TS_preserve_db($value) {
00750         if (!$this->preserveTags) {
00751             return $value;
00752         }
00753 
00754             // Splitting into blocks for processing (span-tags are used for special tags)
00755         $blockSplit = $this->splitIntoBlock('span', $value);
00756         foreach ($blockSplit as $k => $v) {
00757             if ($k % 2) { // block:
00758                 $attribArray = $this->get_tag_attributes_classic($this->getFirstTag($v));
00759                 if ($attribArray['specialtag']) {
00760                     $theTag = rawurldecode($attribArray['specialtag']);
00761                     $theTagName = $this->getFirstTagName($theTag);
00762                     $blockSplit[$k] = $theTag . $this->removeFirstAndLastTag($blockSplit[$k]) . '</' . $theTagName . '>';
00763                 }
00764             }
00765         }
00766         return implode('', $blockSplit);
00767     }
00768 
00769     /**
00770      * Preserve special tags
00771      *
00772      * @param   string      Content input
00773      * @return  string      Content output
00774      */
00775     function TS_preserve_rte($value) {
00776         if (!$this->preserveTags) {
00777             return $value;
00778         }
00779 
00780         $blockSplit = $this->splitIntoBlock($this->preserveTags, $value);
00781         foreach ($blockSplit as $k => $v) {
00782             if ($k % 2) { // block:
00783                 $blockSplit[$k] = '<span specialtag="' . rawurlencode($this->getFirstTag($v)) . '">' . $this->removeFirstAndLastTag($blockSplit[$k]) . '</span>';
00784             }
00785         }
00786         return implode('', $blockSplit);
00787     }
00788 
00789     /**
00790      * Transformation handler: 'ts_transform' + 'css_transform' / direction: "db"
00791      * Cleaning (->db) for standard content elements (ts)
00792      *
00793      * @param   string      Content input
00794      * @param   boolean     If true, the transformation was "css_transform", otherwise "ts_transform"
00795      * @return  string      Content output
00796      * @see TS_transform_rte()
00797      */
00798     function TS_transform_db($value, $css = FALSE) {
00799 
00800             // safety... so forever loops are avoided (they should not occur, but an error would potentially do this...)
00801         $this->TS_transform_db_safecounter--;
00802         if ($this->TS_transform_db_safecounter < 0) {
00803             return $value;
00804         }
00805 
00806             // Split the content from RTE by the occurence of these blocks:
00807         $blockSplit = $this->splitIntoBlock('TABLE,BLOCKQUOTE,' . ($this->procOptions['preserveDIVSections'] ? 'DIV,' : '') . $this->blockElementList, $value);
00808 
00809         $cc = 0;
00810         $aC = count($blockSplit);
00811 
00812             // Avoid superfluous linebreaks by transform_db after ending headListTag
00813         while ($aC && !strcmp(trim($blockSplit[$aC - 1]), '')) {
00814             unset($blockSplit[$aC - 1]);
00815             $aC = count($blockSplit);
00816         }
00817 
00818             // Traverse the blocks
00819         foreach ($blockSplit as $k => $v) {
00820             $cc++;
00821             $lastBR = $cc == $aC ? '' : LF;
00822 
00823             if ($k % 2) { // Inside block:
00824 
00825                     // Init:
00826                 $tag = $this->getFirstTag($v);
00827                 $tagName = strtolower($this->getFirstTagName($v));
00828 
00829                     // Process based on the tag:
00830                 switch ($tagName) {
00831                     case 'blockquote': // Keep blockquotes, but clean the inside recursively in the same manner as the main code
00832                     case 'dd' : // Do the same on dd elements
00833                     case 'div': // Do the same on div sections, if they were splitted
00834                         $blockSplit[$k] = $tag . $this->TS_transform_db($this->removeFirstAndLastTag($blockSplit[$k]), $css) . '</' . $tagName . '>' . $lastBR;
00835                     break;
00836                     case 'ol':
00837                     case 'ul': // Transform lists into <typolist>-tags:
00838                         if (!$css) {
00839                             if (!isset($this->procOptions['typolist']) || $this->procOptions['typolist']) {
00840                                 $parts = $this->getAllParts($this->splitIntoBlock('LI', $this->removeFirstAndLastTag($blockSplit[$k])), 1, 0);
00841                                 foreach ($parts as $k2 => $value) {
00842                                     $parts[$k2] = preg_replace('/[' . preg_quote(LF . CR) . ']+/', '', $parts[$k2]); // remove all linesbreaks!
00843                                     $parts[$k2] = $this->defaultTStagMapping($parts[$k2], 'db');
00844                                     $parts[$k2] = $this->cleanFontTags($parts[$k2], 0, 0, 0);
00845                                     $parts[$k2] = $this->HTMLcleaner_db($parts[$k2], strtolower($this->procOptions['allowTagsInTypolists'] ? $this->procOptions['allowTagsInTypolists'] : 'br,font,b,i,u,a,img,span,strong,em'));
00846                                 }
00847                                 if ($tagName == 'ol') {
00848                                     $params = ' type="1"';
00849                                 } else {
00850                                     $params = '';
00851                                 }
00852                                 $blockSplit[$k] = '<typolist' . $params . '>' . LF . implode(LF, $parts) . LF . '</typolist>' . $lastBR;
00853                             }
00854                         } else {
00855                             $blockSplit[$k] = preg_replace('/[' . preg_quote(LF . CR) . ']+/', ' ', $this->transformStyledATags($blockSplit[$k])) . $lastBR;
00856                         }
00857                     break;
00858                     case 'table': // Tables are NOT allowed in any form (unless preserveTables is set or CSS is the mode)
00859                         if (!$this->procOptions['preserveTables'] && !$css) {
00860                             $blockSplit[$k] = $this->TS_transform_db($this->removeTables($blockSplit[$k]));
00861                         } else {
00862                             $blockSplit[$k] = preg_replace('/[' . preg_quote(LF . CR) . ']+/', ' ', $this->transformStyledATags($blockSplit[$k])) . $lastBR;
00863                         }
00864                     break;
00865                     case 'h1':
00866                     case 'h2':
00867                     case 'h3':
00868                     case 'h4':
00869                     case 'h5':
00870                     case 'h6':
00871                         if (!$css) {
00872                             $attribArray = $this->get_tag_attributes_classic($tag);
00873                                 // Processing inner content here:
00874                             $innerContent = $this->HTMLcleaner_db($this->removeFirstAndLastTag($blockSplit[$k]));
00875 
00876                             if (!isset($this->procOptions['typohead']) || $this->procOptions['typohead']) {
00877                                 $type = intval(substr($tagName, 1));
00878                                 $blockSplit[$k] = '<typohead' .
00879                                                   ($type != 6 ? ' type="' . $type . '"' : '') .
00880                                                   ($attribArray['align'] ? ' align="' . $attribArray['align'] . '"' : '') .
00881                                                   ($attribArray['class'] ? ' class="' . $attribArray['class'] . '"' : '') .
00882                                                   '>' .
00883                                                   $innerContent .
00884                                                   '</typohead>' .
00885                                                   $lastBR;
00886                             } else {
00887                                 $blockSplit[$k] = '<' . $tagName .
00888                                                   ($attribArray['align'] ? ' align="' . htmlspecialchars($attribArray['align']) . '"' : '') .
00889                                                   ($attribArray['class'] ? ' class="' . htmlspecialchars($attribArray['class']) . '"' : '') .
00890                                                   '>' .
00891                                                   $innerContent .
00892                                                   '</' . $tagName . '>' .
00893                                                   $lastBR;
00894                             }
00895                         } else {
00896                                 // Eliminate true linebreaks inside Hx tags
00897                             $blockSplit[$k] = preg_replace('/[' . preg_quote(LF . CR) . ']+/', ' ', $this->transformStyledATags($blockSplit[$k])) . $lastBR;
00898                         }
00899                     break;
00900                     default:
00901                             // Eliminate true linebreaks inside other headlist tags
00902                         $blockSplit[$k] = preg_replace('/[' . preg_quote(LF . CR) . ']+/', ' ', $this->transformStyledATags($blockSplit[$k])) . $lastBR;
00903                     break;
00904                 }
00905             } else { // NON-block:
00906                 if (strcmp(trim($blockSplit[$k]), '')) {
00907                         // Remove linebreaks following hr tags
00908                     $blockSplit[$k] = preg_replace('/<(hr)(\s[^>\/]*)?[[:space:]]*\/?>[' . preg_quote(LF . CR) . ']+/', '<$1$2 />', $blockSplit[$k]);
00909                         // Replace other linebreaks with space
00910                     $blockSplit[$k] = preg_replace('/[' . preg_quote(LF . CR) . ']+/', ' ', $blockSplit[$k]);
00911                     $blockSplit[$k] = $this->divideIntoLines($blockSplit[$k]) . $lastBR;
00912                     $blockSplit[$k] = $this->transformStyledATags($blockSplit[$k]);
00913                 } else {
00914                     unset($blockSplit[$k]);
00915                 }
00916             }
00917         }
00918         $this->TS_transform_db_safecounter++;
00919 
00920         return implode('', $blockSplit);
00921     }
00922 
00923     /**
00924      * Wraps a-tags that contain a style attribute with a span-tag
00925      *
00926      * @param   string      Content input
00927      * @return  string      Content output
00928      */
00929     function transformStyledATags($value) {
00930         $blockSplit = $this->splitIntoBlock('A', $value);
00931         foreach ($blockSplit as $k => $v) {
00932             if ($k % 2) { // If an A-tag was found:
00933                 $attribArray = $this->get_tag_attributes_classic($this->getFirstTag($v), 1);
00934                 if ($attribArray['style']) { // If "style" attribute is set!
00935                     $attribArray_copy['style'] = $attribArray['style'];
00936                     unset($attribArray['style']);
00937                     $bTag = '<span ' . t3lib_div::implodeAttributes($attribArray_copy, 1) . '><a ' . t3lib_div::implodeAttributes($attribArray, 1) . '>';
00938                     $eTag = '</a></span>';
00939                     $blockSplit[$k] = $bTag . $this->removeFirstAndLastTag($blockSplit[$k]) . $eTag;
00940                 }
00941             }
00942         }
00943         return implode('', $blockSplit);
00944     }
00945 
00946     /**
00947      * Transformation handler: 'ts_transform' + 'css_transform' / direction: "rte"
00948      * Set (->rte) for standard content elements (ts)
00949      *
00950      * @param   string      Content input
00951      * @param   boolean     If true, the transformation was "css_transform", otherwise "ts_transform"
00952      * @return  string      Content output
00953      * @see TS_transform_db()
00954      */
00955     function TS_transform_rte($value, $css = 0) {
00956 
00957             // Split the content from Database by the occurence of these blocks:
00958         $blockSplit = $this->splitIntoBlock('TABLE,BLOCKQUOTE,TYPOLIST,TYPOHEAD,' . ($this->procOptions['preserveDIVSections'] ? 'DIV,' : '') . $this->blockElementList, $value);
00959 
00960             // Traverse the blocks
00961         foreach ($blockSplit as $k => $v) {
00962             if ($k % 2) { // Inside one of the blocks:
00963 
00964                     // Init:
00965                 $tag = $this->getFirstTag($v);
00966                 $tagName = strtolower($this->getFirstTagName($v));
00967                 $attribArray = $this->get_tag_attributes_classic($tag);
00968 
00969                     // Based on tagname, we do transformations:
00970                 switch ($tagName) {
00971                     case 'blockquote': // Keep blockquotes
00972                     case 'dd': // Keep definitions
00973                     case 'div': // Keep div sections, if they were splitted
00974                         $blockSplit[$k] = $tag .
00975                                           $this->TS_transform_rte($this->removeFirstAndLastTag($blockSplit[$k]), $css) .
00976                                           '</' . $tagName . '>';
00977                     break;
00978                     case 'typolist': // Transform typolist blocks into OL/UL lists. Type 1 is expected to be numerical block
00979                         if (!isset($this->procOptions['typolist']) || $this->procOptions['typolist']) {
00980                             $tListContent = $this->removeFirstAndLastTag($blockSplit[$k]);
00981                             $tListContent = preg_replace('/^[ ]*' . LF . '/', '', $tListContent);
00982                             $tListContent = preg_replace('/' . LF . '[ ]*$/', '', $tListContent);
00983                             $lines = explode(LF, $tListContent);
00984                             $typ = $attribArray['type'] == 1 ? 'ol' : 'ul';
00985                             $blockSplit[$k] = '<' . $typ . '>' . LF .
00986                                               '<li>' . implode('</li>' . LF . '<li>', $lines) . '</li>' .
00987                                               '</' . $typ . '>';
00988                         }
00989                     break;
00990                     case 'typohead': // Transform typohead into Hx tags.
00991                         if (!isset($this->procOptions['typohead']) || $this->procOptions['typohead']) {
00992                             $tC = $this->removeFirstAndLastTag($blockSplit[$k]);
00993                             $typ = t3lib_div::intInRange($attribArray['type'], 0, 6);
00994                             if (!$typ) {
00995                                 $typ = 6;
00996                             }
00997                             $align = $attribArray['align'] ? ' align="' . $attribArray['align'] . '"' : '';
00998                             $class = $attribArray['class'] ? ' class="' . $attribArray['class'] . '"' : '';
00999                             $blockSplit[$k] = '<h' . $typ . $align . $class . '>' .
01000                                               $tC .
01001                                               '</h' . $typ . '>';
01002                         }
01003                     break;
01004                 }
01005                 $blockSplit[$k + 1] = preg_replace('/^[ ]*' . LF . '/', '', $blockSplit[$k + 1]); // Removing linebreak if typohead
01006             } else { // NON-block:
01007                 $nextFTN = $this->getFirstTagName($blockSplit[$k + 1]);
01008                 $singleLineBreak = $blockSplit[$k] == LF;
01009                 if (t3lib_div::inList('TABLE,BLOCKQUOTE,TYPOLIST,TYPOHEAD,' . ($this->procOptions['preserveDIVSections'] ? 'DIV,' : '') . $this->blockElementList, $nextFTN)) { // Removing linebreak if typolist/typohead
01010                     $blockSplit[$k] = preg_replace('/' . LF . '[ ]*$/', '', $blockSplit[$k]);
01011                 }
01012                     // If $blockSplit[$k] is blank then unset the line. UNLESS the line happend to be a single line break.
01013                 if (!strcmp($blockSplit[$k], '') && !$singleLineBreak) {
01014                     unset($blockSplit[$k]);
01015                 } else {
01016                     $blockSplit[$k] = $this->setDivTags($blockSplit[$k], ($this->procOptions['useDIVasParagraphTagForRTE'] ? 'div' : 'p'));
01017                 }
01018             }
01019         }
01020         return implode(LF, $blockSplit);
01021     }
01022 
01023     /**
01024      * Transformation handler: 'ts_strip' / direction: "db"
01025      * Removing all non-allowed tags
01026      *
01027      * @param   string      Content input
01028      * @return  string      Content output
01029      */
01030     function TS_strip_db($value) {
01031         $value = strip_tags($value, '<' . implode('><', explode(',', 'b,i,u,a,img,br,div,center,pre,font,hr,sub,sup,p,strong,em,li,ul,ol,blockquote')) . '>');
01032         return $value;
01033     }
01034 
01035 
01036     /***************************************************************
01037      *
01038      * Generic RTE transformation, analysis and helper functions
01039      *
01040      **************************************************************/
01041 
01042     /**
01043      * Reads the file or url $url and returns the content
01044      *
01045      * @param   string      Filepath/URL to read
01046      * @return  string      The content from the resource given as input.
01047      * @see t3lib_div::getURL()
01048      */
01049     function getURL($url) {
01050         return t3lib_div::getURL($url);
01051     }
01052 
01053     /**
01054      * Function for cleaning content going into the database.
01055      * Content is cleaned eg. by removing unallowed HTML and ds-HSC content
01056      * It is basically calling HTMLcleaner from the parent class with some preset configuration specifically set up for cleaning content going from the RTE into the db
01057      *
01058      * @param   string      Content to clean up
01059      * @param   string      Comma list of tags to specifically allow. Default comes from getKeepTags and is ""
01060      * @return  string      Clean content
01061      * @see getKeepTags()
01062      */
01063     function HTMLcleaner_db($content, $tagList = '') {
01064         if (!$tagList) {
01065             $keepTags = $this->getKeepTags('db');
01066         } else {
01067             $keepTags = $this->getKeepTags('db', $tagList);
01068         }
01069         $kUknown = $this->procOptions['dontRemoveUnknownTags_db'] ? 1 : 0; // Default: remove unknown tags.
01070         $hSC = $this->procOptions['dontUndoHSC_db'] ? 0 : -1; // Default: re-convert literals to characters (that is &lt; to <)
01071 
01072             // Create additional configuration in order to honor the setting RTE.default.proc.HTMLparser_db.xhtml_cleaning=1
01073         $addConfig = array();
01074         if ((is_array($this->procOptions['HTMLparser_db.']) && $this->procOptions['HTMLparser_db.']['xhtml_cleaning']) || (is_array($this->procOptions['entryHTMLparser_db.']) && $this->procOptions['entryHTMLparser_db.']['xhtml_cleaning']) || (is_array($this->procOptions['exitHTMLparser_db.']) && $this->procOptions['exitHTMLparser_db.']['xhtml_cleaning'])) {
01075             $addConfig['xhtml'] = 1;
01076         }
01077 
01078         return $this->HTMLcleaner($content, $keepTags, $kUknown, $hSC, $addConfig);
01079     }
01080 
01081     /**
01082      * Creates an array of configuration for the HTMLcleaner function based on whether content go TO or FROM the Rich Text Editor ($direction)
01083      * Unless "tagList" is given, the function will cache the configuration for next time processing goes on. (In this class that is the case only if we are processing a bulletlist)
01084      *
01085      * @param   string      The direction of the content being processed by the output configuration; "db" (content going into the database FROM the rte) or "rte" (content going into the form)
01086      * @param   string      Comma list of tags to keep (overriding default which is to keep all + take notice of internal configuration)
01087      * @return  array       Configuration array
01088      * @see HTMLcleaner_db()
01089      */
01090     function getKeepTags($direction = 'rte', $tagList = '') {
01091         if (!is_array($this->getKeepTags_cache[$direction]) || $tagList) {
01092 
01093                 // Setting up allowed tags:
01094             if (strcmp($tagList, '')) { // If the $tagList input var is set, this will take precedence
01095                 $keepTags = array_flip(t3lib_div::trimExplode(',', $tagList, 1));
01096             } else { // Default is to get allowed/denied tags from internal array of processing options:
01097                     // Construct default list of tags to keep:
01098                 $typoScript_list = 'b,i,u,a,img,br,div,center,pre,font,hr,sub,sup,p,strong,em,li,ul,ol,blockquote,strike,span';
01099                 $keepTags = array_flip(t3lib_div::trimExplode(',', $typoScript_list . ',' . strtolower($this->procOptions['allowTags']), 1));
01100 
01101                     // For tags to deny, remove them from $keepTags array:
01102                 $denyTags = t3lib_div::trimExplode(',', $this->procOptions['denyTags'], 1);
01103                 foreach ($denyTags as $dKe) {
01104                     unset($keepTags[$dKe]);
01105                 }
01106             }
01107 
01108                 // Based on the direction of content, set further options:
01109             switch ($direction) {
01110 
01111                     // GOING from database to Rich Text Editor:
01112                 case 'rte':
01113 
01114                     if (!isset($this->procOptions['transformBoldAndItalicTags']) || $this->procOptions['transformBoldAndItalicTags']) {
01115                             // Transform bold/italics tags to strong/em
01116                         if (isset($keepTags['b'])) {
01117                             $keepTags['b'] = array('remap' => 'STRONG');
01118                         }
01119                         if (isset($keepTags['i'])) {
01120                             $keepTags['i'] = array('remap' => 'EM');
01121                         }
01122                     }
01123 
01124                         // Transforming keepTags array so it can be understood by the HTMLcleaner function. This basically converts the format of the array from TypoScript (having .'s) to plain multi-dimensional array.
01125                     list($keepTags) = $this->HTMLparserConfig($this->procOptions['HTMLparser_rte.'], $keepTags);
01126                 break;
01127 
01128                     // GOING from RTE to database:
01129                 case 'db':
01130 
01131                     if (!isset($this->procOptions['transformBoldAndItalicTags']) || $this->procOptions['transformBoldAndItalicTags']) {
01132                             // Transform strong/em back to bold/italics:
01133                         if (isset($keepTags['strong'])) {
01134                             $keepTags['strong'] = array('remap' => 'b');
01135                         }
01136                         if (isset($keepTags['em'])) {
01137                             $keepTags['em'] = array('remap' => 'i');
01138                         }
01139                     }
01140 
01141                         // Setting up span tags if they are allowed:
01142                     if (isset($keepTags['span'])) {
01143                         $classes = array_merge(array(''), $this->allowedClasses);
01144                         $keepTags['span'] = array(
01145                             'allowedAttribs' => 'id,class,style,title,lang,xml:lang,dir',
01146                             'fixAttrib' => Array(
01147                                 'class' => Array(
01148                                     'list' => $classes,
01149                                     'removeIfFalse' => 1
01150                                 ),
01151                             ),
01152                             'rmTagIfNoAttrib' => 1
01153                         );
01154                         if (!$this->procOptions['allowedClasses']) {
01155                             unset($keepTags['span']['fixAttrib']['class']['list']);
01156                         }
01157                     }
01158 
01159                         // Setting up font tags if they are allowed:
01160                     if (isset($keepTags['font'])) {
01161                         $colors = array_merge(array(''), t3lib_div::trimExplode(',', $this->procOptions['allowedFontColors'], 1));
01162                         $keepTags['font'] = array(
01163                             'allowedAttribs' => 'face,color,size',
01164                             'fixAttrib' => Array(
01165                                 'face' => Array(
01166                                     'removeIfFalse' => 1
01167                                 ),
01168                                 'color' => Array(
01169                                     'removeIfFalse' => 1,
01170                                     'list' => $colors
01171                                 ),
01172                                 'size' => Array(
01173                                     'removeIfFalse' => 1,
01174                                 )
01175                             ),
01176                             'rmTagIfNoAttrib' => 1
01177                         );
01178                         if (!$this->procOptions['allowedFontColors']) {
01179                             unset($keepTags['font']['fixAttrib']['color']['list']);
01180                         }
01181                     }
01182 
01183                         // Setting further options, getting them from the processiong options:
01184                     $TSc = $this->procOptions['HTMLparser_db.'];
01185                     if (!$TSc['globalNesting']) {
01186                         $TSc['globalNesting'] = 'b,i,u,a,center,font,sub,sup,strong,em,strike,span';
01187                     }
01188                     if (!$TSc['noAttrib']) {
01189                         $TSc['noAttrib'] = 'b,i,u,br,center,hr,sub,sup,strong,em,li,ul,ol,blockquote,strike';
01190                     }
01191 
01192                         // Transforming the array from TypoScript to regular array:
01193                     list($keepTags) = $this->HTMLparserConfig($TSc, $keepTags);
01194                 break;
01195             }
01196 
01197                 // Caching (internally, in object memory) the result unless tagList is set:
01198             if (!$tagList) {
01199                 $this->getKeepTags_cache[$direction] = $keepTags;
01200             } else {
01201                 return $keepTags;
01202             }
01203         }
01204 
01205             // Return result:
01206         return $this->getKeepTags_cache[$direction];
01207     }
01208 
01209     /**
01210      * This resolves the $value into parts based on <div></div>-sections and <p>-sections and <br />-tags. These are returned as lines separated by LF.
01211      * This point is to resolve the HTML-code returned from RTE into ordinary lines so it's 'human-readable'
01212      * The function ->setDivTags does the opposite.
01213      * This function processes content to go into the database.
01214      *
01215      * @param   string      Value to process.
01216      * @param   integer     Recursion brake. Decremented on each recursion down to zero. Default is 5 (which equals the allowed nesting levels of p/div tags).
01217      * @param   boolean     If true, an array with the lines is returned, otherwise a string of the processed input value.
01218      * @return  string      Processed input value.
01219      * @see setDivTags()
01220      */
01221     function divideIntoLines($value, $count = 5, $returnArray = FALSE) {
01222 
01223             // Internalize font tags (move them from OUTSIDE p/div to inside it that is the case):
01224         if ($this->procOptions['internalizeFontTags']) {
01225             $value = $this->internalizeFontTags($value);
01226         }
01227 
01228             // Setting configuration for processing:
01229         $allowTagsOutside = t3lib_div::trimExplode(',', strtolower($this->procOptions['allowTagsOutside'] ? 'hr,' . $this->procOptions['allowTagsOutside'] : 'hr,img'), 1);
01230         $remapParagraphTag = strtoupper($this->procOptions['remapParagraphTag']);
01231         $divSplit = $this->splitIntoBlock('div,p', $value, 1); // Setting the third param to 1 will eliminate false end-tags. Maybe this is a good thing to do...?
01232 
01233         if ($this->procOptions['keepPDIVattribs']) {
01234             $keepAttribListArr = t3lib_div::trimExplode(',', strtolower($this->procOptions['keepPDIVattribs']), 1);
01235         } else {
01236             $keepAttribListArr = array();
01237         }
01238 
01239             // Returns plainly the value if there was no div/p sections in it
01240         if (count($divSplit) <= 1 || $count <= 0) {
01241                 // Wrap hr tags with LF's
01242             $newValue = preg_replace('/<(hr)(\s[^>\/]*)?[[:space:]]*\/?>/i', LF . '<$1$2 />' . LF, $value);
01243             $newValue = preg_replace('/' . preg_quote(LF . LF) . '/i', LF, $newValue);
01244             $newValue = preg_replace('/(^' . preg_quote(LF) . ')|(' . preg_quote(LF) . '$)/i', '', $newValue);
01245             return $newValue;
01246         }
01247 
01248             // Traverse the splitted sections:
01249         foreach ($divSplit as $k => $v) {
01250             if ($k % 2) { // Inside
01251                 $v = $this->removeFirstAndLastTag($v);
01252 
01253                     // Fetching 'sub-lines' - which will explode any further p/div nesting...
01254                 $subLines = $this->divideIntoLines($v, $count - 1, 1);
01255                 if (is_array($subLines)) { // So, if there happend to be sub-nesting of p/div, this is written directly as the new content of THIS section. (This would be considered 'an error')
01256                     // No noting.
01257                 } else { //... but if NO subsection was found, we process it as a TRUE line without erronous content:
01258                     $subLines = array($subLines);
01259                     if (!$this->procOptions['dontConvBRtoParagraph']) { // process break-tags, if configured for. Simply, the breaktags will here be treated like if each was a line of content...
01260                         $subLines = preg_split('/<br[[:space:]]*[\/]?>/i', $v);
01261                     }
01262 
01263                         // Traverse sublines (there is typically one, except if <br/> has been converted to lines as well!)
01264                     foreach ($subLines as $sk => $value) {
01265 
01266                             // Clear up the subline for DB.
01267                         $subLines[$sk] = $this->HTMLcleaner_db($subLines[$sk]);
01268 
01269                             // Get first tag, attributes etc:
01270                         $fTag = $this->getFirstTag($divSplit[$k]);
01271                         $tagName = strtolower($this->getFirstTagName($divSplit[$k]));
01272                         $attribs = $this->get_tag_attributes($fTag);
01273 
01274                             // Keep attributes (lowercase)
01275                         $newAttribs = array();
01276                         if (count($keepAttribListArr)) {
01277                             foreach ($keepAttribListArr as $keepA) {
01278                                 if (isset($attribs[0][$keepA])) {
01279                                     $newAttribs[$keepA] = $attribs[0][$keepA];
01280                                 }
01281                             }
01282                         }
01283 
01284                             // ALIGN attribute:
01285                         if (!$this->procOptions['skipAlign'] && strcmp(trim($attribs[0]['align']), '') && strtolower($attribs[0]['align']) != 'left') { // Set to value, but not 'left'
01286                             $newAttribs['align'] = strtolower($attribs[0]['align']);
01287                         }
01288 
01289                             // CLASS attribute:
01290                         if (!$this->procOptions['skipClass'] && strcmp(trim($attribs[0]['class']), '')) { // Set to whatever value
01291                             if (!count($this->allowedClasses) || in_array($attribs[0]['class'], $this->allowedClasses)) {
01292                                 $newAttribs['class'] = $attribs[0]['class'];
01293                             } else {
01294                                 $classes = t3lib_div::trimExplode(' ', $attribs[0]['class'], true);
01295                                 $newClasses = array();
01296                                 foreach ($classes as $class) {
01297                                     if (in_array($class, $this->allowedClasses)) {
01298                                         $newClasses[] = $class;
01299                                     }
01300                                 }
01301                                 if (count($newClasses)) {
01302                                     $newAttribs['class'] = implode(' ', $newClasses);
01303                                 }
01304                             }
01305                         }
01306 
01307                             // Remove any line break char (10 or 13)
01308                         $subLines[$sk] = preg_replace('/' . LF . '|' . CR . '/', '', $subLines[$sk]);
01309 
01310                             // If there are any attributes or if we are supposed to remap the tag, then do so:
01311                         if (count($newAttribs) && strcmp($remapParagraphTag, '1')) {
01312                             if ($remapParagraphTag == 'P') {
01313                                 $tagName = 'p';
01314                             }
01315                             if ($remapParagraphTag == 'DIV') {
01316                                 $tagName = 'div';
01317                             }
01318                             $subLines[$sk] = '<' . trim($tagName . ' ' . $this->compileTagAttribs($newAttribs)) . '>' . $subLines[$sk] . '</' . $tagName . '>';
01319                         }
01320                     }
01321                 }
01322                     // Add the processed line(s)
01323                 $divSplit[$k] = implode(LF, $subLines);
01324 
01325                     // If it turns out the line is just blank (containing a &nbsp; possibly) then just make it pure blank.
01326                     // But, prevent filtering of lines that are blank in sense above, but whose tags contain attributes.
01327                     // Those attributes should have been filtered before; if they are still there they must be considered as possible content.
01328                 if (trim(strip_tags($divSplit[$k])) == '&nbsp;' && !preg_match('/<(img)(\s[^>]*)?\/?>/si', $divSplit[$k]) && !preg_match('/<([^>]*)?( align| class| style| id| title| dir| lang| xml:lang)([^>]*)?>/si', trim($divSplit[$k]))) {
01329                     $divSplit[$k] = '';
01330                 }
01331             } else { // outside div:
01332                     // Remove positions which are outside div/p tags and without content
01333                 $divSplit[$k] = trim(strip_tags($divSplit[$k], '<' . implode('><', $allowTagsOutside) . '>'));
01334                     // Wrap hr tags with LF's
01335                 $divSplit[$k] = preg_replace('/<(hr)(\s[^>\/]*)?[[:space:]]*\/?>/i', LF . '<$1$2 />' . LF, $divSplit[$k]);
01336                 $divSplit[$k] = preg_replace('/' . preg_quote(LF . LF) . '/i', LF, $divSplit[$k]);
01337                 $divSplit[$k] = preg_replace('/(^' . preg_quote(LF) . ')|(' . preg_quote(LF) . '$)/i', '', $divSplit[$k]);
01338                 if (!strcmp($divSplit[$k], '')) {
01339                     unset($divSplit[$k]);
01340                 } // Remove part if it's empty
01341             }
01342         }
01343 
01344             // Return value:
01345         return $returnArray ? $divSplit : implode(LF, $divSplit);
01346     }
01347 
01348     /**
01349      * Converts all lines into <div></div>/<p></p>-sections (unless the line is a div-section already)
01350      * For processing of content going FROM database TO RTE.
01351      *
01352      * @param   string      Value to convert
01353      * @param   string      Tag to wrap with. Either "p" or "div" should it be. Lowercase preferably.
01354      * @return  string      Processed value.
01355      * @see divideIntoLines()
01356      */
01357     function setDivTags($value, $dT = 'p') {
01358 
01359             // First, setting configuration for the HTMLcleaner function. This will process each line between the <div>/<p> section on their way to the RTE
01360         $keepTags = $this->getKeepTags('rte');
01361         $kUknown = $this->procOptions['dontProtectUnknownTags_rte'] ? 0 : 'protect'; // Default: remove unknown tags.
01362         $hSC = $this->procOptions['dontHSC_rte'] ? 0 : 1; // Default: re-convert literals to characters (that is &lt; to <)
01363         $convNBSP = !$this->procOptions['dontConvAmpInNBSP_rte'] ? 1 : 0;
01364 
01365             // Divide the content into lines, based on LF:
01366         $parts = explode(LF, $value);
01367         foreach ($parts as $k => $v) {
01368 
01369                 // Processing of line content:
01370             if (!strcmp(trim($parts[$k]), '')) { // If the line is blank, set it to &nbsp;
01371                 $parts[$k] = '&nbsp;';
01372             } else { // Clean the line content:
01373                 $parts[$k] = $this->HTMLcleaner($parts[$k], $keepTags, $kUknown, $hSC);
01374                 if ($convNBSP) {
01375                     $parts[$k] = str_replace('&amp;nbsp;', '&nbsp;', $parts[$k]);
01376                 }
01377             }
01378 
01379                 // Wrapping the line in <$dT> if not already wrapped and does not contain an hr tag
01380             if (!preg_match('/<(hr)(\s[^>\/]*)?[[:space:]]*\/?>/i', $parts[$k])) {
01381                 $testStr = strtolower(trim($parts[$k]));
01382                 if (substr($testStr, 0, 4) != '<div' || substr($testStr, -6) != '</div>') {
01383                     if (substr($testStr, 0, 2) != '<p' || substr($testStr, -4) != '</p>') {
01384                         // Only set p-tags if there is not already div or p tags:
01385                         $parts[$k] = '<' . $dT . '>' . $parts[$k] . '</' . $dT . '>';
01386                     }
01387                 }
01388             }
01389         }
01390 
01391             // Implode result:
01392         return implode(LF, $parts);
01393     }
01394 
01395     /**
01396      * This splits the $value in font-tag chunks.
01397      * If there are any <P>/<DIV> sections inside of them, the font-tag is wrapped AROUND the content INSIDE of the P/DIV sections and the outer font-tag is removed.
01398      * This functions seems to be a good choice for pre-processing content if it has been pasted into the RTE from eg. star-office.
01399      * In that case the font-tags are normally on the OUTSIDE of the sections.
01400      * This function is used by eg. divideIntoLines() if the procesing option 'internalizeFontTags' is set.
01401      *
01402      * @param   string      Input content
01403      * @return  string      Output content
01404      * @see divideIntoLines()
01405      */
01406     function internalizeFontTags($value) {
01407 
01408             // Splitting into font tag blocks:
01409         $fontSplit = $this->splitIntoBlock('font', $value);
01410 
01411         foreach ($fontSplit as $k => $v) {
01412             if ($k % 2) { // Inside
01413                 $fTag = $this->getFirstTag($v); // Fint font-tag
01414 
01415                 $divSplit_sub = $this->splitIntoBlock('div,p', $this->removeFirstAndLastTag($v), 1);
01416                 if (count($divSplit_sub) > 1) { // If there were div/p sections inside the font-tag, do something about it...
01417                         // traverse those sections:
01418                     foreach ($divSplit_sub as $k2 => $v2) {
01419                         if ($k2 % 2) { // Inside
01420                             $div_p = $this->getFirstTag($v2); // Fint font-tag
01421                             $div_p_tagname = $this->getFirstTagName($v2); // Fint font-tag
01422                             $v2 = $this->removeFirstAndLastTag($v2); // ... and remove it from original.
01423                             $divSplit_sub[$k2] = $div_p . $fTag . $v2 . '</font>' . '</' . $div_p_tagname . '>';
01424                         } elseif (trim(strip_tags($v2))) {
01425                             $divSplit_sub[$k2] = $fTag . $v2 . '</font>';
01426                         }
01427                     }
01428                     $fontSplit[$k] = implode('', $divSplit_sub);
01429                 }
01430             }
01431         }
01432 
01433         return implode('', $fontSplit);
01434     }
01435 
01436     /**
01437      * Returns SiteURL based on thisScript.
01438      *
01439      * @return  string      Value of t3lib_div::getIndpEnv('TYPO3_SITE_URL');
01440      * @see t3lib_div::getIndpEnv()
01441      */
01442     function siteUrl() {
01443         return t3lib_div::getIndpEnv('TYPO3_SITE_URL');
01444     }
01445 
01446     /**
01447      * Return the storage folder of RTE image files.
01448      * Default is $GLOBALS['TYPO3_CONF_VARS']['BE']['RTE_imageStorageDir'] unless something else is configured in the types configuration for the RTE.
01449      *
01450      * @return  string
01451      */
01452     function rteImageStorageDir() {
01453         return $this->rte_p['imgpath'] ? $this->rte_p['imgpath'] : $GLOBALS['TYPO3_CONF_VARS']['BE']['RTE_imageStorageDir'];
01454     }
01455 
01456     /**
01457      * Remove all tables from incoming code
01458      * The function is trying to to this is some more or less respectfull way. The approach is to resolve each table cells content and implode it all by <br /> chars. Thus at least the content is preserved in some way.
01459      *
01460      * @param   string      Input value
01461      * @param   string      Break character to use for linebreaks.
01462      * @return  string      Output value
01463      */
01464     function removeTables($value, $breakChar = '<br />') {
01465 
01466             // Splitting value into table blocks:
01467         $tableSplit = $this->splitIntoBlock('table', $value);
01468 
01469             // Traverse blocks of tables:
01470         foreach ($tableSplit as $k => $v) {
01471             if ($k % 2) {
01472                 $tableSplit[$k] = '';
01473                 $rowSplit = $this->splitIntoBlock('tr', $v);
01474                 foreach ($rowSplit as $k2 => $v2) {
01475                     if ($k2 % 2) {
01476                         $cellSplit = $this->getAllParts($this->splitIntoBlock('td', $v2), 1, 0);
01477                         foreach ($cellSplit as $k3 => $v3) {
01478                             $tableSplit[$k] .= $v3 . $breakChar;
01479                         }
01480                     }
01481                 }
01482             }
01483         }
01484 
01485             // Implode it all again:
01486         return implode($breakChar, $tableSplit);
01487     }
01488 
01489     /**
01490      * Default tag mapping for TS
01491      *
01492      * @param   string      Input code to process
01493      * @param   string      Direction To databsae (db) or from database to RTE (rte)
01494      * @return  string      Processed value
01495      */
01496     function defaultTStagMapping($code, $direction = 'rte') {
01497         if ($direction == 'db') {
01498             $code = $this->mapTags($code, array( // Map tags
01499                                                'strong' => 'b',
01500                                                'em' => 'i'
01501                                           ));
01502         }
01503         if ($direction == 'rte') {
01504             $code = $this->mapTags($code, array( // Map tags
01505                                                'b' => 'strong',
01506                                                'i' => 'em'
01507                                           ));
01508         }
01509         return $code;
01510     }
01511 
01512     /**
01513      * Finds width and height from attrib-array
01514      * If the width and height is found in the style-attribute, use that!
01515      *
01516      * @param   array       Array of attributes from tag in which to search. More specifically the content of the key "style" is used to extract "width:xxx / height:xxx" information
01517      * @return  array       Integer w/h in key 0/1. Zero is returned if not found.
01518      */
01519     function getWHFromAttribs($attribArray) {
01520         $style = trim($attribArray['style']);
01521         if ($style) {
01522             $regex = '[[:space:]]*:[[:space:]]*([0-9]*)[[:space:]]*px';
01523                 // Width
01524             $reg = array();
01525             preg_match('/width' . $regex . '/i', $style, $reg);
01526             $w = intval($reg[1]);
01527                 // Height
01528             preg_match('/height' . $regex . '/i', $style, $reg);
01529             $h = intval($reg[1]);
01530         }
01531         if (!$w) {
01532             $w = $attribArray['width'];
01533         }
01534         if (!$h) {
01535             $h = $attribArray['height'];
01536         }
01537         return array(intval($w), intval($h));
01538     }
01539 
01540     /**
01541      * Parse <A>-tag href and return status of email,external,file or page
01542      *
01543      * @param   string      URL to analyse.
01544      * @return  array       Information in an array about the URL
01545      */
01546     function urlInfoForLinkTags($url) {
01547         $info = array();
01548         $url = trim($url);
01549         if (substr(strtolower($url), 0, 7) == 'mailto:') {
01550             $info['url'] = trim(substr($url, 7));
01551             $info['type'] = 'email';
01552         } else {
01553             $curURL = $this->siteUrl(); // 100502, removed this: 'http://'.t3lib_div::getThisUrl(); Reason: The url returned had typo3/ in the end - should be only the site's url as far as I see...
01554             for ($a = 0; $a < strlen($url); $a++) {
01555                 if ($url{$a} != $curURL{$a}) {
01556                     break;
01557                 }
01558             }
01559 
01560             $info['relScriptPath'] = substr($curURL, $a);
01561             $info['relUrl'] = substr($url, $a);
01562             $info['url'] = $url;
01563             $info['type'] = 'ext';
01564 
01565             $siteUrl_parts = parse_url($url);
01566             $curUrl_parts = parse_url($curURL);
01567 
01568             if ($siteUrl_parts['host'] == $curUrl_parts['host'] // Hosts should match
01569                 && (!$info['relScriptPath'] || (defined('TYPO3_mainDir') && substr($info['relScriptPath'], 0, strlen(TYPO3_mainDir)) == TYPO3_mainDir))) { // If the script path seems to match or is empty (FE-EDIT)
01570 
01571                     // New processing order 100502
01572                 $uP = parse_url($info['relUrl']);
01573 
01574                 if (!strcmp('#' . $siteUrl_parts['fragment'], $info['relUrl'])) {
01575                     $info['url'] = $info['relUrl'];
01576                     $info['type'] = 'anchor';
01577                 } elseif (!trim($uP['path']) || !strcmp($uP['path'], 'index.php')) {
01578                         // URL is a page (id parameter)
01579                     $pp = preg_split('/^id=/', $uP['query']);
01580                     $pp[1] = preg_replace('/&id=[^&]*/', '', $pp[1]);
01581                     $parameters = explode('&', $pp[1]);
01582                     $id = array_shift($parameters);
01583                     if ($id) {
01584                         $info['pageid'] = $id;
01585                         $info['cElement'] = $uP['fragment'];
01586                         $info['url'] = $id . ($info['cElement'] ? '#' . $info['cElement'] : '');
01587                         $info['type'] = 'page';
01588                         $info['query'] = $parameters[0] ? '&' . implode('&', $parameters) : '';
01589                     }
01590                 } else {
01591                     $info['url'] = $info['relUrl'];
01592                     $info['type'] = 'file';
01593                 }
01594             } else {
01595                 unset($info['relScriptPath']);
01596                 unset($info['relUrl']);
01597             }
01598         }
01599         return $info;
01600     }
01601 
01602     /**
01603      * Converting <A>-tags to absolute URLs (+ setting rtekeep attribute)
01604      *
01605      * @param   string      Content input
01606      * @param   boolean     If true, then the "rtekeep" attribute will not be set.
01607      * @return  string      Content output
01608      */
01609     function TS_AtagToAbs($value, $dontSetRTEKEEP = FALSE) {
01610         $blockSplit = $this->splitIntoBlock('A', $value);
01611         foreach ($blockSplit as $k => $v) {
01612             if ($k % 2) { // block:
01613                 $attribArray = $this->get_tag_attributes_classic($this->getFirstTag($v), 1);
01614 
01615                     // Checking if there is a scheme, and if not, prepend the current url.
01616                 if (strlen($attribArray['href'])) { // ONLY do this if href has content - the <a> tag COULD be an anchor and if so, it should be preserved...
01617                     $uP = parse_url(strtolower($attribArray['href']));
01618                     if (!$uP['scheme']) {
01619                         $attribArray['href'] = $this->siteUrl() . substr($attribArray['href'], strlen($this->relBackPath));
01620                     } elseif ($uP['scheme'] != 'mailto') {
01621                         $attribArray['external'] = 1;
01622                     }
01623                 } else {
01624                     $attribArray['rtekeep'] = 1;
01625                 }
01626                 if (!$dontSetRTEKEEP) {
01627                     $attribArray['rtekeep'] = 1;
01628                 }
01629 
01630                 $bTag = '<a ' . t3lib_div::implodeAttributes($attribArray, 1) . '>';
01631                 $eTag = '</a>';
01632                 $blockSplit[$k] = $bTag . $this->TS_AtagToAbs($this->removeFirstAndLastTag($blockSplit[$k])) . $eTag;
01633             }
01634         }
01635         return implode('', $blockSplit);
01636     }
01637 }
01638 
01639 
01640 if (defined('TYPO3_MODE') && isset($GLOBALS['TYPO3_CONF_VARS'][TYPO3_MODE]['XCLASS']['t3lib/class.t3lib_parsehtml_proc.php'])) {
01641     include_once($GLOBALS['TYPO3_CONF_VARS'][TYPO3_MODE]['XCLASS']['t3lib/class.t3lib_parsehtml_proc.php']);
01642 }
01643 
01644 ?>