|
TYPO3 API
SVNRelease
|
00001 <?php 00002 /*************************************************************** 00003 * Copyright notice 00004 * 00005 * (c) 2001-2011 Kasper Skårhøj (kasperYYYY@typo3.com) 00006 * All rights reserved 00007 * 00008 * This script is part of the TYPO3 project. The TYPO3 project is 00009 * free software; you can redistribute it and/or modify 00010 * it under the terms of the GNU General Public License as published by 00011 * the Free Software Foundation; either version 2 of the License, or 00012 * (at your option) any later version. 00013 * 00014 * The GNU General Public License can be found at 00015 * http://www.gnu.org/copyleft/gpl.html. 00016 * A copy is found in the textfile GPL.txt and important notices to the license 00017 * from the author is found in LICENSE.txt distributed with these scripts. 00018 * 00019 * 00020 * This script is distributed in the hope that it will be useful, 00021 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00022 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00023 * GNU General Public License for more details. 00024 * 00025 * This copyright notice MUST APPEAR in all copies of the script! 00026 ***************************************************************/ 00027 /** 00028 * Index search crawler hook example 00029 * 00030 * $Id: class.crawlerhook.php 10120 2011-01-18 20:03:36Z ohader $ 00031 * 00032 * @author Kasper Skårhøj <kasperYYYY@typo3.com> 00033 */ 00034 /** 00035 * [CLASS/FUNCTION INDEX of SCRIPT] 00036 * 00037 * 00038 * 00039 * 57: class tx_indexedsearch_crawlerhook 00040 * 64: function initMessage() 00041 * 80: function indexOperation($cfgRec,&$session_data,$params,&$pObj) 00042 * 00043 * TOTAL FUNCTIONS: 2 00044 * (This index is automatically created/updated by the extension "extdeveval") 00045 * 00046 */ 00047 00048 00049 00050 /** 00051 * Index search crawler hook example 00052 * 00053 * @package TYPO3 00054 * @subpackage tx_indexedsearch 00055 * @author Kasper Skårhøj <kasperYYYY@typo3.com> 00056 */ 00057 class tx_indexedsearch_crawlerhook { 00058 00059 /** 00060 * Function is called when an indexing session starts according to the time intervals set for the indexing configuration. 00061 * 00062 * @return string Return a text string for the first, initiating queue entry for the crawler. 00063 */ 00064 function initMessage() { 00065 return 'Start of Custom Example Indexing session!'; 00066 } 00067 00068 /** 00069 * This will do two things: 00070 * 1) Carry out actual indexing of content (one or more items) 00071 * 2) Add one or more new entries into the crawlers queue so we are called again (another instance) for further indexing in the session (optional of course, if all indexing is done, we add no new entries) 00072 * 00073 * @param array Indexing Configuration Record (the record which holds the information that lead to this indexing session...) 00074 * @param array Session data variable. Passed by reference. Changed content is saved and passed back upon next instance in the session. 00075 * @param array Params array from the queue entry. 00076 * @param object Grant Parent Object (from "crawler" extension) 00077 * @param object Parent Object (from "indexed_search" extension) 00078 * @return void 00079 */ 00080 function indexOperation($cfgRec,&$session_data,$params,&$pObj) { 00081 00082 // Init session data array if not already: 00083 if (!is_array($session_data)) { 00084 $session_data = array( 00085 'step' => 0 00086 ); 00087 } 00088 00089 // Increase step counter (this is just an example of how the session data can be used - to track how many instances of indexing is left) 00090 $session_data['step']++; 00091 00092 00093 switch((int)$session_data['step']) { 00094 case 1: // Indexing Example: Content accessed with GET parameters added to URL: 00095 00096 // Load indexer if not yet [DON'T CHANGE]: 00097 $pObj->loadIndexerClass(); 00098 00099 // Get rootline from the Indexing Record (needed because the indexer relates all search results to a position in the page tree!) [DON'T CHANGE]: 00100 $rl = $pObj->getUidRootLineForClosestTemplate($cfgRec['pid']); 00101 00102 // Set up language uid, if any: 00103 $sys_language_uid = 0; 00104 00105 // Set up 2 example items to index: 00106 $exampleItems = array( 00107 array( 00108 'ID' => '123', 00109 'title' => 'Title of Example 1', 00110 'content' => 'Vestibulum leo turpis, fringilla sit amet, semper eget, vestibulum ut, arcu. Vestibulum mauris orci, vulputate quis, congue eget, nonummy' 00111 ), 00112 array( 00113 'ID' => 'example2', 00114 'title' => 'Title of Example 2', 00115 'content' => 'Cras tortor turpis, vulputate non, accumsan a, pretium in, magna. Cras turpis turpis, pretium pulvinar, pretium vel, nonummy eu.' 00116 ) 00117 ); 00118 00119 // For each item, index it (this is what you might like to do in batches of like 100 items if all your content spans thousands of items!) 00120 foreach($exampleItems as $item) { 00121 00122 // Prepare the GET variables array that must be added to the page URL in order to view result: 00123 parse_str('&itemID='.rawurlencode($item['ID']), $GETparams); 00124 00125 // Prepare indexer (make instance, initialize it, set special features for indexing parameterized content - probably none of this should be changed by you) [DON'T CHANGE]: 00126 $indexerObj = t3lib_div::makeInstance('tx_indexedsearch_indexer'); 00127 $indexerObj->backend_initIndexer($cfgRec['pid'], 0, $sys_language_uid, '', $rl, $GETparams, FALSE); 00128 $indexerObj->backend_setFreeIndexUid($cfgRec['uid'], $cfgRec['set_id']); 00129 $indexerObj->forceIndexing = TRUE; 00130 00131 // Indexing the content of the item (see tx_indexedsearch_indexer::backend_indexAsTYPO3Page() for options) 00132 $indexerObj->backend_indexAsTYPO3Page( 00133 $item['title'], 00134 '', 00135 '', 00136 $item['content'], 00137 $GLOBALS['LANG']->charSet, // Charset of content - MUST be set. 00138 $item['tstamp'], // Last-modified date 00139 $item['create_date'], // Created date 00140 $item['ID'] 00141 ); 00142 } 00143 break; 00144 case 2: // Indexing Example: Content accessed directly in file system: 00145 00146 // Load indexer if not yet [DON'T CHANGE]: 00147 $pObj->loadIndexerClass(); 00148 00149 // Get rootline from the Indexing Record (needed because the indexer relates all search results to a position in the page tree!) [DON'T CHANGE]: 00150 $rl = $pObj->getUidRootLineForClosestTemplate($cfgRec['pid']); 00151 00152 // Set up language uid, if any: 00153 $sys_language_uid = 0; 00154 00155 // Prepare indexer (make instance, initialize it, set special features for indexing parameterized content - probably none of this should be changed by you) [DON'T CHANGE]: 00156 $indexerObj = t3lib_div::makeInstance('tx_indexedsearch_indexer'); 00157 $indexerObj->backend_initIndexer($cfgRec['pid'], 0, $sys_language_uid, '', $rl); 00158 $indexerObj->backend_setFreeIndexUid($cfgRec['uid'], $cfgRec['set_id']); 00159 $indexerObj->hash['phash'] = -1; // To avoid phash_t3 being written to file sections (otherwise they are removed when page is reindexed!!!) 00160 00161 // Index document: 00162 $indexerObj->indexRegularDocument('fileadmin/templates/index.html', TRUE); 00163 break; 00164 case 3: // Indexing Example: Content accessed on External URLs: 00165 00166 // Load indexer if not yet. 00167 $pObj->loadIndexerClass(); 00168 00169 // Index external URL: 00170 $indexerObj = t3lib_div::makeInstance('tx_indexedsearch_indexer'); 00171 $indexerObj->backend_initIndexer($cfgRec['pid'], 0, $sys_language_uid, '', $rl); 00172 $indexerObj->backend_setFreeIndexUid($cfgRec['uid'], $cfgRec['set_id']); 00173 $indexerObj->hash['phash'] = -1; // To avoid phash_t3 being written to file sections (otherwise they are removed when page is reindexed!!!) 00174 00175 // Index external URL (HTML only): 00176 $indexerObj->indexExternalUrl('http://www.google.com/'); 00177 break; 00178 } 00179 00180 // Finally, set entry for next indexing instance (if all steps are not completed) 00181 if ($session_data['step']<=3) { 00182 $title = 'Step #'.$session_data['step'].' of 3'; // Just information field. Never mind that the field is called "url" - this is what will be shown in the "crawler" log. Could be a URL - or whatever else tells what that indexing instance will do. 00183 $pObj->addQueueEntryForHook($cfgRec, $title); 00184 } 00185 } 00186 } 00187 00188 00189 if (defined('TYPO3_MODE') && isset($GLOBALS['TYPO3_CONF_VARS'][TYPO3_MODE]['XCLASS']['ext/indexed_search/example/class.crawlerhook.php'])) { 00190 include_once($GLOBALS['TYPO3_CONF_VARS'][TYPO3_MODE]['XCLASS']['ext/indexed_search/example/class.crawlerhook.php']); 00191 } 00192 00193 ?>
1.8.0