TYPO3 API  SVNRelease
class.tx_rtehtmlarea_parse_html.php
Go to the documentation of this file.
00001 <?php
00002 /***************************************************************
00003 *  Copyright notice
00004 *
00005 *  (c) 2005-2011 Stanislas Rolland <typo3(arobas)sjbr.ca>
00006 *  All rights reserved
00007 *
00008 *  This script is part of the TYPO3 project. The TYPO3 project is
00009 *  free software; you can redistribute it and/or modify
00010 *  it under the terms of the GNU General Public License as published by
00011 *  the Free Software Foundation; either version 2 of the License, or
00012 *  (at your option) any later version.
00013 *
00014 *  The GNU General Public License can be found at
00015 *  http://www.gnu.org/copyleft/gpl.html.
00016 *  A copy is found in the textfile GPL.txt and important notices to the license
00017 *  from the author is found in LICENSE.txt distributed with these scripts.
00018 *
00019 *
00020 *  This script is distributed in the hope that it will be useful,
00021 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00022 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00023 *  GNU General Public License for more details.
00024 *
00025 *  This copyright notice MUST APPEAR in all copies of the script!
00026 ***************************************************************/
00027 /**
00028  * Content parsing for htmlArea RTE
00029  *
00030  * @author  Stanislas Rolland <typo3(arobas)sjbr.ca>
00031  *
00032  * $Id: class.tx_rtehtmlarea_parse_html.php 10120 2011-01-18 20:03:36Z ohader $  *
00033  */
00034 
00035 class tx_rtehtmlarea_parse_html {
00036     var $content;
00037     var $modData;
00038 
00039     /**
00040      * document template object
00041      *
00042      * @var template
00043      */
00044     var $doc;
00045     var $extKey = 'rtehtmlarea';
00046     var $prefixId = 'TYPO3HtmlParser';
00047 
00048     /**
00049      * @return  [type]      ...
00050      */
00051     function init() {
00052         global $BE_USER,$BACK_PATH,$MCONF;
00053 
00054         $this->doc = t3lib_div::makeInstance('template');
00055         $this->doc->backPath = $BACK_PATH;
00056         $this->doc->JScode='';
00057 
00058         $this->modData = $BE_USER->getModuleData($MCONF['name'],'ses');
00059         if (t3lib_div::_GP('OC_key'))   {
00060             $parts = explode('|',t3lib_div::_GP('OC_key'));
00061             $this->modData['openKeys'][$parts[1]] = $parts[0]=='O' ? 1 : 0;
00062             $BE_USER->pushModuleData($MCONF['name'],$this->modData);
00063         }
00064     }
00065 
00066     /**
00067      * [Describe function...]
00068      *
00069      * @return  [type]      ...
00070      */
00071     function main() {
00072         global $LANG;
00073 
00074         $this->content .= $this->main_parse_html($this->modData['openKeys']);
00075 
00076             // if no HTTP input conversion is configured, the input was uft-8 (urlencoded).
00077         $fromCharSet = 'utf-8';
00078             // if conversion was done, the input is encoded in mbstring.internal_encoding
00079         if (in_array('mbstring', get_loaded_extensions()) && ini_get('mbstring.encoding_translation')) {
00080             $fromCharSet = strToLower(ini_get('mbstring.internal_encoding'));
00081         }
00082 
00083         $clientInfo = t3lib_div::clientInfo();
00084             // the charset of the content element, possibly overidden by forceCharset
00085         $toCharSet = t3lib_div::_GP('charset')?t3lib_div::_GP('charset'):'iso-8859-1';
00086             // IE wants it back in utf-8
00087         if ( $clientInfo['BROWSER']= 'msie') {
00088             $toCharSet = 'utf-8';
00089         } elseif ($clientInfo['SYSTEM'] = 'win') {
00090                 // if the client is windows the input may contain windows-1252 characters;
00091             if (strToLower($toCharSet) == 'iso-8859-1') {
00092                 $toCharSet = 'Windows-1252';
00093             }
00094         }
00095             // convert to requested charset
00096         $this->content = $LANG->csConvObj->conv($this->content, $fromCharSet, $toCharSet);
00097         header('Content-Type: text/plain; charset='.$toCharSet);
00098     }
00099 
00100     /**
00101      * [Describe function...]
00102      *
00103      * @return  [type]      ...
00104      */
00105     function printContent() {
00106         echo $this->content;
00107     }
00108 
00109     /**
00110      * Rich Text Editor (RTE) html parser
00111      *
00112      * @param   [type]      $openKeys: ...
00113      * @return  [type]      ...
00114      */
00115     function main_parse_html($openKeys) {
00116         global $BE_USER, $TYPO3_CONF_VARS;
00117 
00118         $editorNo = t3lib_div::_GP('editorNo');
00119         $html = t3lib_div::_GP('content');
00120 
00121         $RTEtsConfigParts = explode(':',t3lib_div::_GP('RTEtsConfigParams'));
00122         $RTEsetup = $BE_USER->getTSConfig('RTE',t3lib_BEfunc::getPagesTSconfig($RTEtsConfigParts[5]));
00123         $thisConfig = t3lib_BEfunc::RTEsetup($RTEsetup['properties'],$RTEtsConfigParts[0],$RTEtsConfigParts[2],$RTEtsConfigParts[4]);
00124 
00125         $HTMLParser = t3lib_div::makeInstance('t3lib_parsehtml');
00126         if (is_array($thisConfig['enableWordClean.'])) {
00127             $HTMLparserConfig = $thisConfig['enableWordClean.']['HTMLparser.'];
00128             if (is_array($HTMLparserConfig)) {
00129                 $this->keepSpanTagsWithId($HTMLparserConfig);
00130                 $HTMLparserConfig = $HTMLParser->HTMLparserConfig($HTMLparserConfig);
00131             }
00132         }
00133         if (is_array($HTMLparserConfig)) {
00134             $html = $HTMLParser->HTMLcleaner($html, $HTMLparserConfig[0], $HTMLparserConfig[1], $HTMLparserConfig[2], $HTMLparserConfig[3]);
00135         }
00136 
00137         if (is_array ($TYPO3_CONF_VARS['EXTCONF'][$this->extKey][$this->prefixId]['cleanPastedContent'])) {
00138             foreach  ($TYPO3_CONF_VARS['EXTCONF'][$this->extKey][$this->prefixId]['cleanPastedContent'] as $classRef) {
00139                 $hookObj = t3lib_div::getUserObj($classRef);
00140                 if (method_exists($hookObj, 'cleanPastedContent_afterCleanWord')) {
00141                     $html = $hookObj->cleanPastedContent_afterCleanWord($html, $thisConfig);
00142                 }
00143             }
00144         }
00145         return $html;
00146     }
00147     /**
00148      * Modify incoming HTMLparser config in an attempt to keep span tags with id
00149      * Such tags are used by the RTE in order to restore the cursor position when the cleaning operation is completed.
00150      *
00151      * @param   array       $HTMLparserConfig: incoming HTMLParser configuration (wil be modified)
00152      * @return  void
00153      */
00154     protected function keepSpanTagsWithId(&$HTMLparserConfig) {
00155             // Allow span tag
00156         if (isset($HTMLparserConfig['allowTags'])) {
00157             if (!t3lib_div::inList($HTMLparserConfig['allowTags'], 'span')) {
00158                 $HTMLparserConfig['allowTags'] .= ',span';
00159             }
00160         } else {
00161             $HTMLparserConfig['allowTags'] = 'span';
00162         }
00163             // Allow attributes on span tags
00164         if (isset($HTMLparserConfig['noAttrib']) && t3lib_div::inList($HTMLparserConfig['noAttrib'], 'span')) {
00165             $HTMLparserConfig['noAttrib'] = t3lib_div::rmFromList('span', $HTMLparserConfig['noAttrib']);
00166         }
00167             // Do not remove span tags
00168         if (isset($HTMLparserConfig['removeTags']) && t3lib_div::inList($HTMLparserConfig['removeTags'], 'span')) {
00169             $HTMLparserConfig['removeTags'] = t3lib_div::rmFromList('span', $HTMLparserConfig['removeTags']);
00170         }
00171             // Review the tags array
00172         if (is_array($HTMLparserConfig['tags.'])) {
00173                 // Allow span tag
00174             if (isset($HTMLparserConfig['tags.']['span']) && !$HTMLparserConfig['tags.']['span']) {
00175                 $HTMLparserConfig['tags.']['span'] = 1;
00176             }
00177             if (is_array($HTMLparserConfig['tags.']['span.'])) {
00178                 if (isset($HTMLparserConfig['tags.']['span.']['allowedAttribs'])) {
00179                     if (!$HTMLparserConfig['tags.']['span.']['allowedAttribs']) {
00180                         $HTMLparserConfig['tags.']['span.']['allowedAttribs'] = 'id';
00181                     } else if (!t3lib_div::inList($HTMLparserConfig['tags.']['span.']['allowedAttribs'], 'id')) {
00182                         $HTMLparserConfig['tags.']['span.']['allowedAttribs'] .= ',id';
00183                     }
00184                 }
00185                 if (isset($HTMLparserConfig['tags.']['span.']['fixAttrib.']['id.']['unset'])) {
00186                     unset($HTMLparserConfig['tags.']['span.']['fixAttrib.']['id.']['unset']);
00187                 }
00188             }
00189         }
00190     }
00191 }
00192 if (defined('TYPO3_MODE') && isset($GLOBALS['TYPO3_CONF_VARS'][TYPO3_MODE]['XCLASS']['ext/rtehtmlarea/mod6/class.tx_rtehtmlarea_parse_html.php'])) {
00193     include_once($GLOBALS['TYPO3_CONF_VARS'][TYPO3_MODE]['XCLASS']['ext/rtehtmlarea/mod6/class.tx_rtehtmlarea_parse_html.php']);
00194 }
00195 ?>