TYPO3 API  SVNRelease
class.tx_linkvalidator_processor.php
Go to the documentation of this file.
00001 <?php
00002 /***************************************************************
00003  *  Copyright notice
00004  *
00005  *  (c) 2010 - 2011 Michael Miousse (michael.miousse@infoglobe.ca)
00006  *  All rights reserved
00007  *
00008  *  This script is part of the TYPO3 project. The TYPO3 project is
00009  *  free software; you can redistribute it and/or modify
00010  *  it under the terms of the GNU General Public License as published by
00011  *  the Free Software Foundation; either version 2 of the License, or
00012  *  (at your option) any later version.
00013  *
00014  *  The GNU General Public License can be found at
00015  *  http://www.gnu.org/copyleft/gpl.html.
00016  *
00017  *  This script is distributed in the hope that it will be useful,
00018  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00019  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00020  *  GNU General Public License for more details.
00021  *
00022  *  This copyright notice MUST APPEAR in all copies of the script!
00023  ***************************************************************/
00024 
00025 /**
00026  * This class provides Processing plugin implementation.
00027  *
00028  * @author Michael Miousse <michael.miousse@infoglobe.ca>
00029  * @author Jochen Rieger <j.rieger@connecta.ag>
00030  * @package TYPO3
00031  * @subpackage linkvalidator
00032  */
00033 
00034 $GLOBALS['LANG']->includeLLFile('EXT:linkvalidator/modfuncreport/locallang.xml');
00035 
00036 class tx_linkvalidator_Processor {
00037 
00038     /**
00039      * Array of tables and fields to search for broken links.
00040      *
00041      * @var array
00042      */
00043     protected $searchFields = array();
00044 
00045     /**
00046      * List of comma seperated page uids (rootline downwards).
00047      *
00048      * @var string
00049      */
00050     protected $pidList = '';
00051 
00052     /**
00053      * Array of tables and the number of external links they contain.
00054      *
00055      * @var array
00056      */
00057     protected $linkCounts = array();
00058 
00059     /**
00060      * Array of tables and the number of broken external links they contain.
00061      *
00062      * @var array
00063      */
00064     protected $brokenLinkCounts = array();
00065 
00066     /**
00067      * Array of tables and records containing broken links.
00068      *
00069      * @var array
00070      */
00071     protected $recordsWithBrokenLinks = array();
00072 
00073     /**
00074      * Array for hooks for own checks.
00075      *
00076      * @var array
00077      */
00078     protected $hookObjectsArr = array();
00079 
00080     /**
00081      * Array with information about the current page.
00082      *
00083      * @var array
00084      */
00085     protected $extPageInTreeInfo = array();
00086 
00087     /**
00088      * Fill hookObjectsArr with different link types and possible XClasses.
00089      */
00090     public function __construct() {
00091             // Hook to handle own checks
00092         if (is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['linkvalidator']['checkLinks'])) {
00093             foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['linkvalidator']['checkLinks'] as $key => $classRef) {
00094                 $this->hookObjectsArr[$key] = &t3lib_div::getUserObj($classRef);
00095             }
00096         }
00097     }
00098 
00099     /**
00100      * Init Function: Here all the needed configuration values are stored in class variables.
00101      *
00102      * @param   array       $searchField: list of fields in which to search for links
00103      * @param   string      $pid: list of comma separated page uids in which to search for links
00104      * @return  void
00105      */
00106     public function init($searchField, $pid) {
00107         $this->searchFields = $searchField;
00108         $this->pidList = $pid;
00109     }
00110 
00111     /**
00112      * Find all supported broken links and store them in tx_linkvalidator_link.
00113      *
00114      * @param   array       $checkOptions: list of hook object to activate
00115      * @param   boolean     $considerHidden: defines whether to look into hidden fields or not
00116      * @return  void
00117      */
00118     public function getLinkStatistics($checkOptions = array(), $considerHidden = FALSE) {
00119         $results = array();
00120         $checlLinkTypeCondition = '';
00121         if(count($checkOptions) > 0) {
00122             $checkKeys = array_keys($checkOptions);
00123             $checlLinkTypeCondition = ' and link_type in (\'' . implode('\',\'',$checkKeys) . '\')';
00124         }
00125         $GLOBALS['TYPO3_DB']->exec_DELETEquery('tx_linkvalidator_link', '(record_pid in (' . $this->pidList . ') or ( record_uid IN (' . $this->pidList . ') and table_name like \'pages\')) ' . $checlLinkTypeCondition);
00126         
00127             // let's traverse all configured tables
00128         foreach ($this->searchFields as $table => $fields) {
00129             if($table == 'pages'){
00130                 $where = 'deleted = 0 AND uid IN (' . $this->pidList . ')';
00131             }
00132             else{
00133                 $where = 'deleted = 0 AND pid IN (' . $this->pidList . ')';
00134             }
00135             if (!$considerHidden) {
00136                 $where .= t3lib_BEfunc::BEenableFields($table);
00137             }
00138                 // if table is not configured, we assume the ext is not installed and therefore no need to check it
00139             if (!is_array($GLOBALS['TCA'][$table])) continue;
00140 
00141                 // re-init selectFields for table
00142             $selectFields = 'uid, pid';
00143             $selectFields .= ', ' . $GLOBALS['TCA'][$table]['ctrl']['label'] . ', ' . implode(', ', $fields);
00144             
00145                 // TODO: only select rows that have content in at least one of the relevant fields (via OR)
00146             $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery($selectFields, $table, $where);
00147                 // Get record rows of table
00148             while ($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) {
00149                 // Analyse each record
00150                 $this->analyzeRecord($results, $table, $fields, $row);
00151             }
00152         }
00153 
00154         foreach ($this->hookObjectsArr as $key => $hookObj) {
00155             if ((is_array($results[$key])) && empty($checkOptions) || (is_array($results[$key]) && $checkOptions[$key])) {
00156                     //  check'em!
00157                 foreach ($results[$key] as $entryKey => $entryValue) {
00158                     $table = $entryValue['table'];
00159                     $record = array();
00160                     $record['headline'] = $entryValue['row'][$GLOBALS['TCA'][$table]['ctrl']['label']];
00161                     $record['record_pid'] = $entryValue['row']['pid'];
00162                     $record['record_uid'] = $entryValue['uid'];
00163                     $record['table_name'] = $table;
00164                     $record['link_title'] = $entryValue['link_title'];
00165                     $record['field'] = $entryValue['field'];
00166                     $record['last_check'] = time();
00167 
00168                     $this->recordReference = $entryValue['substr']['recordRef'];
00169 
00170                     $this->pageWithAnchor = $entryValue['pageAndAnchor'];
00171                     
00172                     if (!empty($this->pageWithAnchor)) {
00173                             // page with anchor, e.g. 18#1580
00174                         $url = $this->pageWithAnchor;
00175                     } else {
00176                         $url = $entryValue['substr']['tokenValue'];
00177                     }
00178 
00179                     $this->linkCounts[$table]++;
00180                     $checkURL = $hookObj->checkLink($url, $entryValue, $this);
00181                         // broken link found!
00182                     if (!$checkURL) {
00183                         $response = array();
00184                         $response['valid'] = FALSE;
00185                         $response['errorParams'] = $hookObj->getErrorParams();
00186                         $this->brokenLinkCounts[$table]++;
00187                         $record['link_type'] = $key;
00188                         $record['url'] = $url;
00189                         $record['url_response'] = serialize($response);
00190                         $GLOBALS['TYPO3_DB']->exec_INSERTquery('tx_linkvalidator_link', $record);
00191                     } elseif (t3lib_div::_GP('showalllinks')) {
00192                         $response = array();
00193                         $response['valid'] = TRUE;
00194                         $this->brokenLinkCounts[$table]++;
00195                         $record['url'] = $url;
00196                         $record['link_type'] = $key;
00197                         $record['url_response'] = serialize($response);
00198                         $GLOBALS['TYPO3_DB']->exec_INSERTquery('tx_linkvalidator_link', $record);
00199                     }
00200                 }
00201             }
00202         }
00203     }
00204 
00205 
00206     /**
00207      * Find all supported broken links for a specific record.
00208      *
00209      * @param   array       $results: array of broken links
00210      * @param   string      $table: table name of the record
00211      * @param   array       $fields: array of fields to analyze
00212      * @param   array       $record: record to analyse
00213      * @return  void
00214      */
00215     public function analyzeRecord(&$results, $table, $fields, $record) {
00216         
00217             // array to store urls from relevant field contents
00218         $urls = array();
00219 
00220         $referencedRecordType = '';
00221             // last-parsed link element was a page.
00222         $wasPage = TRUE;
00223 
00224             // flag whether row contains a broken link in some field or not
00225         $rowContainsBrokenLink = FALSE;
00226         
00227             // put together content of all relevant fields
00228         $haystack = '';
00229         $htmlParser = t3lib_div::makeInstance('t3lib_parsehtml');
00230         
00231         $idRecord = $record['uid'];
00232         
00233             // get all references
00234         foreach ($fields as $field) {
00235             $haystack .= $record[$field] . ' --- ';
00236             $conf = $GLOBALS['TCA'][$table]['columns'][$field]['config'];
00237             
00238             $valueField = $record[$field];
00239             
00240                 // Check if a TCA configured field has softreferences defined (see TYPO3 Core API document)
00241             if ($conf['softref'] && strlen($valueField)) {
00242                     // Explode the list of softreferences/parameters
00243                 $softRefs = t3lib_BEfunc::explodeSoftRefParserList($conf['softref']);
00244                     // Traverse soft references
00245                 foreach ($softRefs as $spKey => $spParams) {
00246                         // create / get object
00247                     $softRefObj = &t3lib_BEfunc::softRefParserObj($spKey);
00248 
00249                         // If there was an object returned...:
00250                     if (is_object($softRefObj)) {
00251 
00252                             // Do processing
00253                         $resultArray = $softRefObj->findRef($table, $field, $idRecord, $valueField, $spKey, $spParams);
00254                         if (!empty($resultArray['elements'])) {
00255 
00256                             if ($spKey == 'typolink_tag') {
00257                                 $linkTags = $htmlParser->splitIntoBlock('link', $resultArray['content']);
00258 
00259                                 for ($i = 1; $i < count($linkTags); $i += 2) {
00260                                     $referencedRecordType = '';
00261                                     foreach($resultArray['elements'] as $element) {
00262                                             $type = '';
00263                                             $r = $element['subst'];
00264 
00265                                             if (!empty($r['tokenID'])) {
00266                                                 if (substr_count($linkTags[$i], $r['tokenID'])) {
00267                                                         // Type of referenced record
00268                                                     if (strpos($r['recordRef'], 'pages') !== FALSE) {
00269                                                         $currentR = $r;
00270                                                             // contains number of the page
00271                                                         $referencedRecordType = $r['tokenValue'];
00272                                                         $wasPage = TRUE;
00273                                                     }
00274                                                         // append number of content element to the page saved in the last loop
00275                                                     elseif ((strpos($r['recordRef'], 'tt_content') !== FALSE) && ($wasPage === TRUE)) {
00276                                                         $referencedRecordType = $referencedRecordType . '#c' . $r['tokenValue'];
00277                                                         $wasPage = FALSE;
00278                                                     } else {
00279                                                         $currentR = $r;
00280                                                     }
00281                                                     $title = strip_tags($linkTags[$i]);
00282                                                 }
00283                                             }
00284                                     }
00285                                     foreach ($this->hookObjectsArr as $keyArr => $hookObj) {
00286                                         $type = $hookObj->fetchType($currentR, $type, $keyArr);
00287                                     }
00288 
00289                                     $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR["tokenID"]]["substr"] = $currentR;
00290                                     $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR["tokenID"]]["row"] = $record;
00291                                     $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR["tokenID"]]["table"] = $table;
00292                                     $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR["tokenID"]]["field"] = $field;
00293                                     $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR["tokenID"]]["uid"] = $idRecord;
00294                                     $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR["tokenID"]]["link_title"] = $title;
00295                                     $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR["tokenID"]]["pageAndAnchor"] = $referencedRecordType;
00296 
00297                                 }
00298                             } else {
00299 
00300                                 foreach ($resultArray['elements'] as $element) {
00301                                     $r = $element['subst'];
00302                                     $title = '';
00303                                     $type = '';
00304 
00305                                     if (!empty($r)) {
00306                                             // Parse string for special TYPO3 <link> tag:
00307 
00308                                         foreach ($this->hookObjectsArr as $keyArr => $hookObj) {
00309                                             $type = $hookObj->fetchType($r, $type, $keyArr);
00310                                         }
00311                                         $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $r["tokenID"]]["substr"] = $r;
00312                                         $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $r["tokenID"]]["row"] = $record;
00313                                         $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $r["tokenID"]]["table"] = $table;
00314                                         $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $r["tokenID"]]["field"] = $field;
00315                                         $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $r["tokenID"]]["uid"] = $idRecord;
00316 
00317                                     }
00318                                 }
00319                             }
00320                         }
00321                     }
00322                 }
00323             }
00324         }
00325     }
00326 
00327     /**
00328      * Fill a markerarray with the number of links found in a list of pages.
00329      *
00330      * @param   string     $curPage: comma separated list of page uids
00331      * @return  array      markerarray with the number of links found
00332      */
00333     public function getLinkCounts($curPage) {
00334         $markerArray = array();
00335         if (($res = $GLOBALS['TYPO3_DB']->exec_SELECTquery(
00336                 'count(uid) as nbBrokenLinks,link_type',
00337                 'tx_linkvalidator_link',
00338                 'record_pid in (' . $this->pidList . ')',
00339                 'link_type'
00340         ))) {
00341             while (($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res))) {
00342                 $markerArray[$row['link_type']] = $row['nbBrokenLinks'];
00343                 $markerArray['brokenlinkCount'] += $row['nbBrokenLinks'];
00344             }
00345         }
00346         return $markerArray;
00347     }
00348     
00349     /**
00350      * Calls t3lib_tsfeBeUserAuth::extGetTreeList.
00351      * Although this duplicates the function t3lib_tsfeBeUserAuth::extGetTreeList
00352      * this is necessary to create the object that is used recursively by the original function.
00353      *
00354      * Generates a list of page uids from $id. List does not include $id itself.
00355      * The only pages excluded from the list are deleted pages.
00356      *
00357      *                            level in the tree to start collecting uids. Zero means
00358      *                            'start right away', 1 = 'next level and out'
00359      *
00360      * @param   integer     Start page id
00361      * @param   integer     Depth to traverse down the page tree.
00362      * @param   integer     $begin is an optional integer that determines at which
00363      * @param   string      Perms clause
00364      * @return  string      Returns the list with a comma in the end (if any pages selected!)
00365      */
00366     public function extGetTreeList($id, $depth, $begin = 0, $permsClause) {
00367         $depth = intval($depth);
00368         $begin = intval($begin);
00369         $id = intval($id);
00370         $theList = '';
00371 
00372         if ($depth > 0) {
00373             $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery(
00374                 'uid,title',
00375                 'pages',
00376                 'pid=' . $id . ' AND deleted=0 AND ' . $permsClause
00377             );
00378             while ($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) {
00379                 if ($begin <= 0) {
00380                     $theList .= $row['uid'] . ',';
00381                     $this->extPageInTreeInfo[] = array($row['uid'], htmlspecialchars($row['title'], $depth));
00382                 }
00383                 if ($depth > 1) {
00384                     $theList .= $this->extGetTreeList($row['uid'], $depth - 1, $begin - 1, $permsClause);
00385                 }
00386             }
00387         }
00388         return $theList;
00389     }
00390     
00391 
00392 }
00393 
00394 if (defined('TYPO3_MODE') && isset($GLOBALS['TYPO3_CONF_VARS'][TYPO3_MODE]['XCLASS']['ext/linkvalidator/classes/class.tx_linkvalidator_processor.php'])) {
00395     include_once($GLOBALS['TYPO3_CONF_VARS'][TYPO3_MODE]['XCLASS']['ext/linkvalidator/classes/class.tx_linkvalidator_processor.php']);
00396 }
00397 ?>