00001 <?php
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084 require_once(t3lib_extMgm::extPath('indexed_search').'class.indexer.php');
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094 class tx_indexedsearch_modfunc1 extends t3lib_extobjbase {
00095
00096
00097 var $allPhashListed = array();
00098 var $external_parsers = array();
00099 var $iconFileNameCache = array();
00100
00101
00102
00103
00104
00105
00106 var $indexerObj;
00107
00108
00109
00110
00111
00112
00113
00114 function modMenu() {
00115 global $LANG;
00116
00117 return array (
00118 'depth' => array(
00119 0 => $LANG->sL('LLL:EXT:lang/locallang_core.php:labels.depth_0'),
00120 1 => $LANG->sL('LLL:EXT:lang/locallang_core.php:labels.depth_1'),
00121 2 => $LANG->sL('LLL:EXT:lang/locallang_core.php:labels.depth_2'),
00122 3 => $LANG->sL('LLL:EXT:lang/locallang_core.php:labels.depth_3'),
00123 999 => $LANG->sL('LLL:EXT:lang/locallang_core.php:labels.depth_infi'),
00124 ),
00125 'type' => array(
00126 0 => 'Overview',
00127 1 => 'Technical Details',
00128 2 => 'Words and content',
00129 )
00130 );
00131 }
00132
00133
00134
00135
00136
00137
00138 function main() {
00139
00140 global $LANG,$TYPO3_CONF_VARS;
00141
00142
00143 if ($this->pObj->id<=0) return;
00144
00145
00146 $this->maxListPerPage = t3lib_div::_GP('listALL') ? 100000 : 100;
00147
00148
00149 if (t3lib_div::_GP('deletePhash')) {
00150 $this->removeIndexedPhashRow(t3lib_div::_GP('deletePhash'));
00151 }
00152
00153
00154 if (t3lib_div::_POST('_stopwords')) {
00155 $this->processStopWords(t3lib_div::_POST('stopWord'));
00156 }
00157
00158
00159 if (t3lib_div::_POST('_pageKeywords')) {
00160 $this->processPageKeywords(t3lib_div::_POST('pageKeyword'), t3lib_div::_POST('pageKeyword_pageUid'));
00161 }
00162
00163
00164
00165 if (is_array($TYPO3_CONF_VARS['EXTCONF']['indexed_search']['external_parsers'])) {
00166 foreach($TYPO3_CONF_VARS['EXTCONF']['indexed_search']['external_parsers'] as $extension => $_objRef) {
00167 $this->external_parsers[$extension] = t3lib_div::getUserObj($_objRef);
00168
00169
00170 if (!$this->external_parsers[$extension]->softInit($extension)) {
00171 unset($this->external_parsers[$extension]);
00172 }
00173 }
00174 }
00175
00176
00177 $this->indexerObj = t3lib_div::makeInstance('tx_indexedsearch_indexer');
00178
00179
00180 $this->pObj->content = str_replace('','
00181 TABLE.c-list TR TD { white-space: nowrap; vertical-align: top; }
00182 ',$this->pObj->content);
00183
00184
00185
00186 if (t3lib_div::_GET('phash')) {
00187
00188
00189 $theOutput.=$this->pObj->doc->spacer(5);
00190 $theOutput.=$this->pObj->doc->section('Details for a single result row:',$this->showDetailsForPhash(t3lib_div::_GET('phash')),0,1);
00191
00192 } elseif (t3lib_div::_GET('wid')) {
00193
00194 $theOutput.=$this->pObj->doc->spacer(5);
00195 $theOutput.=$this->pObj->doc->section('Details for a word:',$this->showDetailsForWord(t3lib_div::_GET('wid')),0,1);
00196
00197 } elseif (t3lib_div::_GET('metaphone')) {
00198
00199 $theOutput.=$this->pObj->doc->spacer(5);
00200 $theOutput.=$this->pObj->doc->section('Details for metaphone value:',$this->showDetailsForMetaphone(t3lib_div::_GET('metaphone')),0,1);
00201
00202 } elseif (t3lib_div::_GET('reindex')) {
00203
00204 $theOutput.=$this->pObj->doc->spacer(5);
00205 $theOutput.=$this->pObj->doc->section('Reindexing...',$this->reindexPhash(t3lib_div::_GET('reindex'),t3lib_div::_GET('reindex_id')),0,1);
00206
00207 } else {
00208
00209 $h_func = t3lib_BEfunc::getFuncMenu($this->pObj->id,'SET[type]',$this->pObj->MOD_SETTINGS['type'],$this->pObj->MOD_MENU['type'],'index.php');
00210 $h_func.= t3lib_BEfunc::getFuncMenu($this->pObj->id,'SET[depth]',$this->pObj->MOD_SETTINGS['depth'],$this->pObj->MOD_MENU['depth'],'index.php');
00211
00212
00213 $theOutput.=$this->pObj->doc->spacer(5);
00214 $theOutput.=$this->pObj->doc->section($LANG->getLL('title'),$h_func,0,1);
00215
00216 $theOutput.=$this->drawTableOfIndexedPages();
00217 }
00218
00219 return $theOutput;
00220 }
00221
00222
00223
00224
00225
00226
00227
00228
00229
00230
00231
00232
00233
00234
00235
00236
00237
00238
00239
00240
00241
00242
00243 function drawTableOfIndexedPages() {
00244 global $BACK_PATH;
00245
00246
00247 $tree = t3lib_div::makeInstance('t3lib_pageTree');
00248 $perms_clause = $GLOBALS['BE_USER']->getPagePermsClause(1);
00249 $tree->init('AND '.$perms_clause);
00250
00251 $HTML = '<img src="'.$BACK_PATH.t3lib_iconWorks::getIcon('pages',$this->pObj->pageinfo).'" width="18" height="16" align="top" alt="" />';
00252 $tree->tree[] = Array(
00253 'row' => $this->pObj->pageinfo,
00254 'HTML' => $HTML
00255 );
00256
00257 if ($this->pObj->MOD_SETTINGS['depth']) {
00258 $tree->getTree($this->pObj->id, $this->pObj->MOD_SETTINGS['depth'], '');
00259 }
00260
00261
00262 $code = '';
00263 foreach($tree->tree as $data) {
00264 $code.= $this->indexed_info(
00265 $data['row'],
00266 $data['HTML'].
00267 $this->showPageDetails(t3lib_BEfunc::getRecordTitlePrep($data['row']['title']), $data['row']['uid'])
00268 );
00269 }
00270
00271 if ($code) {
00272 $code = '<br /><br />
00273 <table border="0" cellspacing="1" cellpadding="2" class="c-list">'.
00274 $this->printPhashRowHeader().
00275 $code.
00276 '</table>';
00277
00278
00279 $theOutput.=$this->pObj->doc->section('',$code,0,1);
00280 } else {
00281 $theOutput .= $this->pObj->doc->section('', '<br /><br />' . $this->pObj->doc->icons(1) . 'There were no indexed pages found in the tree.<br /><br />', 0, 1);
00282 }
00283
00284 return $theOutput;
00285 }
00286
00287
00288
00289
00290
00291
00292
00293
00294 function indexed_info($data, $firstColContent) {
00295
00296
00297 $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery(
00298 'ISEC.phash_t3, ISEC.rl0, ISEC.rl1, ISEC.rl2, ISEC.page_id, ISEC.uniqid, ' .
00299 'IP.phash, IP.phash_grouping, IP.cHashParams, IP.data_filename, IP.data_page_id, ' .
00300 'IP.data_page_reg1, IP.data_page_type, IP.data_page_mp, IP.gr_list, IP.item_type, ' .
00301 'IP.item_title, IP.item_description, IP.item_mtime, IP.tstamp, IP.item_size, ' .
00302 'IP.contentHash, IP.crdate, IP.parsetime, IP.sys_language_uid, IP.item_crdate, ' .
00303 'IP.externalUrl, IP.recordUid, IP.freeIndexUid, IP.freeIndexSetId, count(*) AS count_val',
00304 'index_phash IP, index_section ISEC',
00305 'IP.phash = ISEC.phash AND ISEC.page_id = '.intval($data['uid']),
00306 'IP.phash,IP.phash_grouping,IP.cHashParams,IP.data_filename,IP.data_page_id,IP.data_page_reg1,IP.data_page_type,IP.data_page_mp,IP.gr_list,IP.item_type,IP.item_title,IP.item_description,IP.item_mtime,IP.tstamp,IP.item_size,IP.contentHash,IP.crdate,IP.parsetime,IP.sys_language_uid,IP.item_crdate,ISEC.phash,ISEC.phash_t3,ISEC.rl0,ISEC.rl1,ISEC.rl2,ISEC.page_id,ISEC.uniqid,IP.externalUrl,IP.recordUid,IP.freeIndexUid,IP.freeIndexSetId',
00307 'IP.item_type, IP.tstamp',
00308 ($this->maxListPerPage+1)
00309 );
00310
00311
00312 $rowCount = 0;
00313 $lines = array();
00314 $phashAcc = array();
00315 $phashAcc[] = 0;
00316
00317
00318 while($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) {
00319 if ($rowCount == $this->maxListPerPage) {
00320 $rowCount++;
00321 break;
00322 }
00323
00324
00325 $lines[$row['phash_grouping']][] = $this->printPhashRow(
00326 $row,
00327 isset($lines[$row['phash_grouping']]),
00328 $this->getGrListEntriesForPhash($row['phash'], $row['gr_list'])
00329 );
00330 $rowCount++;
00331 $phashAcc[] = $row['phash'];
00332 $this->allPhashListed[] = $row['phash'];
00333 }
00334
00335
00336 $out = '';
00337 $cellAttrib = ($data['_CSSCLASS'] ? ' class="'.$data['_CSSCLASS'].'"' : '');
00338 if (count($lines)) {
00339 $firstColContent = '<td rowspan="'.$rowCount.'"'.$cellAttrib.'>'.$firstColContent.'</td>';
00340 foreach($lines as $rowSet) {
00341 foreach($rowSet as $rows) {
00342 $out.='
00343 <tr class="bgColor-20">'.$firstColContent.implode('',$rows).'</tr>';
00344
00345 $firstColContent = '';
00346 }
00347 }
00348
00349 if ($rowCount > $this->maxListPerPage) {
00350 $out.='
00351 <tr class="bgColor-20">
00352 <td> </td>
00353 <td colspan="'.($this->returnNumberOfColumns()-1).'">'.$this->pObj->doc->icons(3).'<span class="">There were more than '.$this->maxListPerPage.' rows. <a href="'.htmlspecialchars('index.php?id='.$this->pObj->id.'&listALL=1').'">Click here to list them ALL!</a></span></td>
00354 </tr>';
00355 }
00356 } else {
00357 $out.='
00358 <tr class="bgColor-20">
00359 <td'.$cellAttrib.'>'.$firstColContent.'</td>
00360 <td colspan="'.($this->returnNumberOfColumns()-1).'"><em>Not indexed</em></td>
00361 </tr>';
00362 }
00363
00364
00365 $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('IP.*', 'index_phash IP', 'IP.data_page_id = '.intval($data['uid']).' AND IP.phash NOT IN ('.implode(',',$phashAcc).')');
00366 while($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) {
00367 $out.='
00368 <tr class="typo3-red">
00369 <td colspan="'.$this->returnNumberOfColumns().'"><strong>Warning:</strong> phash-row "'.$row['phash'].'" didn\'t have a representation in the index_section table!</td>
00370 </tr>';
00371 $this->allPhashListed[] = $row['phash'];
00372 }
00373
00374 return $out;
00375 }
00376
00377
00378
00379
00380
00381
00382
00383
00384
00385
00386 function printPhashRow($row,$grouping=0,$extraGrListRows) {
00387 $lines = array();
00388
00389
00390 $titleCellAttribs = $row['count_val']!=1?' bgcolor="red"':($row['item_type']==='0' ? ' class="bgColor4"' : '');
00391
00392 if ($row['item_type']) {
00393 $arr = unserialize($row['cHashParams']);
00394 $page = $arr['key'] ? ' ['.$arr['key'].']' : '';
00395 } else $page = '';
00396 $elTitle = $this->linkDetails($row['item_title'] ? htmlspecialchars(t3lib_div::fixed_lgd_cs($this->utf8_to_currentCharset($row['item_title']), 20).$page) : '<em>[No Title]</em>',$row['phash']);
00397 $cmdLinks = $this->printRemoveIndexed($row['phash'],'Clear phash-row').$this->printReindex($row,'Re-index element');
00398
00399 switch($this->pObj->MOD_SETTINGS['type']) {
00400 case 1:
00401
00402 if (!$grouping) {
00403 $lines[] = '<td>'.$this->makeItemTypeIcon($row['item_type'], $row['data_filename'] ? $row['data_filename'] : $row['item_title']).'</td>';
00404 } else {
00405 $lines[] = '<td> </td>';
00406 }
00407
00408
00409 $lines[] = '<td'.$titleCellAttribs.'>'.$elTitle.'</td>';
00410
00411
00412 $lines[] = '<td>'.$cmdLinks.'</td>';
00413
00414
00415 $lines[] = '<td>'.$row['phash'].'</td>';
00416 $lines[] = '<td>'.$row['contentHash'].'</td>';
00417
00418 if ($row['item_type']==='0') {
00419 $lines[] = '<td>'.($row['data_page_id'] ? $row['data_page_id'] : ' ').'</td>';
00420 $lines[] = '<td>'.($row['data_page_type'] ? $row['data_page_type'] : ' ').'</td>';
00421 $lines[] = '<td>'.($row['sys_language_uid'] ? $row['sys_language_uid'] : ' ').'</td>';
00422 $lines[] = '<td>'.($row['data_page_mp'] ? $row['data_page_mp'] : ' ').'</td>';
00423 } else {
00424 $lines[] = '<td colspan="4">'.htmlspecialchars($row['data_filename']).'</td>';
00425 }
00426 $lines[] = '<td>'.$row['gr_list'].$this->printExtraGrListRows($extraGrListRows).'</td>';
00427 $lines[] = '<td>'.$this->printRootlineInfo($row).'</td>';
00428 $lines[] = '<td>'.($row['page_id'] ? $row['page_id'] : ' ').'</td>';
00429 $lines[] = '<td>'.($row['phash_t3']!=$row['phash'] ? $row['phash_t3'] : ' ').'</td>';
00430 $lines[] = '<td>'.($row['freeIndexUid'] ? $row['freeIndexUid'].($row['freeIndexSetId']?'/'.$row['freeIndexSetId']:'') : ' ').'</td>';
00431 $lines[] = '<td>'.($row['recordUid'] ? $row['recordUid'] : ' ').'</td>';
00432
00433
00434
00435
00436 $arr = unserialize($row['cHashParams']);
00437 if (!is_array($arr)) {
00438 $arr = array(
00439 'cHash' => $GLOBALS['LANG']->sL('LLL:EXT:lang/locallang_general.xml:LGL.error', true)
00440 );
00441 }
00442 $theCHash = $arr['cHash'];
00443 unset($arr['cHash']);
00444
00445 if ($row['item_type']) {
00446 $lines[] = '<td>'.($arr['key'] ? 'Page '.$arr['key'] : '').' </td>';
00447 } elseif ($row['item_type']==0) {
00448 $lines[] = '<td>'.htmlspecialchars(t3lib_div::implodeArrayForUrl('',$arr)).' </td>';
00449 } else {
00450 $lines[] = '<td class="bgColor"> </td>';
00451 }
00452
00453 $lines[] = '<td>'.$theCHash.'</td>';
00454 break;
00455 case 2:
00456
00457 if (!$grouping) {
00458 $lines[] = '<td>'.$this->makeItemTypeIcon($row['item_type'], $row['data_filename'] ? $row['data_filename'] : $row['item_title']).'</td>';
00459 } else {
00460 $lines[] = '<td> </td>';
00461 }
00462
00463
00464 $lines[] = '<td'.$titleCellAttribs.'>'.$elTitle.'</td>';
00465
00466
00467 $lines[] = '<td>'.$cmdLinks.'</td>';
00468
00469
00470 $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
00471 '*',
00472 'index_fulltext',
00473 'phash = '.intval($row['phash'])
00474 );
00475 $lines[] = '<td style="white-space: normal;">'.
00476 htmlspecialchars(t3lib_div::fixed_lgd_cs($this->utf8_to_currentCharset($ftrows[0]['fulltextdata']),3000)).
00477 '<hr/><em>Size: '.strlen($ftrows[0]['fulltextdata']).'</em>'.
00478 '</td>';
00479
00480
00481 $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
00482 'index_words.baseword, index_rel.*',
00483 'index_rel, index_words',
00484 'index_rel.phash = '.intval($row['phash']).
00485 ' AND index_words.wid = index_rel.wid',
00486 '',
00487 '',
00488 '',
00489 'baseword'
00490 );
00491
00492 $wordList = '';
00493 if (is_array($ftrows)) {
00494 $indexed_words = array_keys($ftrows);
00495 sort($indexed_words);
00496 $wordList = htmlspecialchars($this->utf8_to_currentCharset(implode(' ',$indexed_words)));
00497 $wordList.='<hr/><em>Count: '.count($indexed_words).'</em>';
00498 }
00499
00500 $lines[] = '<td style="white-space: normal;">'.$wordList.'</td>';
00501 break;
00502 default:
00503
00504 if (!$grouping) {
00505 $lines[] = '<td>'.$this->makeItemTypeIcon($row['item_type'], $row['data_filename'] ? $row['data_filename'] : $row['item_title']).'</td>';
00506 } else {
00507 $lines[] = '<td> </td>';
00508 }
00509
00510
00511 $lines[] = '<td'.$titleCellAttribs.'>'.$elTitle.'</td>';
00512
00513
00514 $lines[] = '<td>'.$cmdLinks.'</td>';
00515
00516 $lines[] = '<td style="white-space: normal;">'.htmlspecialchars($this->utf8_to_currentCharset($row['item_description'])).'...</td>';
00517 $lines[] = '<td>'.t3lib_div::formatSize($row['item_size']).'</td>';
00518 $lines[] = '<td>'.t3lib_BEfunc::dateTimeAge($row['tstamp']).'</td>';
00519 break;
00520 }
00521
00522 return $lines;
00523 }
00524
00525
00526
00527
00528
00529
00530 function printPhashRowHeader() {
00531 $lines = array();
00532
00533 switch($this->pObj->MOD_SETTINGS['type']) {
00534 case 1:
00535 $lines[] = '<td> </td>';
00536 $lines[] = '<td> </td>';
00537 $lines[] = '<td>Title</td>';
00538 $lines[] = '<td bgcolor="red">'.$this->printRemoveIndexed('ALL','Clear ALL phash-rows below!').'</td>';
00539
00540 $lines[] = '<td>pHash</td>';
00541 $lines[] = '<td>contentHash</td>';
00542 $lines[] = '<td>&id</td>';
00543 $lines[] = '<td>&type</td>';
00544 $lines[] = '<td>&L</td>';
00545 $lines[] = '<td>&MP</td>';
00546 $lines[] = '<td>grlist</td>';
00547 $lines[] = '<td>Rootline</td>';
00548 $lines[] = '<td>page_id</td>';
00549 $lines[] = '<td>phash_t3</td>';
00550 $lines[] = '<td>CfgUid</td>';
00551 $lines[] = '<td>RecUid</td>';
00552 $lines[] = '<td>GET-parameters</td>';
00553 $lines[] = '<td>&cHash</td>';
00554 break;
00555 case 2:
00556 $lines[] = '<td> </td>';
00557 $lines[] = '<td> </td>';
00558 $lines[] = '<td>Title</td>';
00559 $lines[] = '<td bgcolor="red">'.$this->printRemoveIndexed('ALL','Clear ALL phash-rows below!').'</td>';
00560 $lines[] = '<td>Content<br />
00561 <img src="clear.gif" width="300" height="1" alt="" /></td>';
00562 $lines[] = '<td>Words<br />
00563 <img src="clear.gif" width="300" height="1" alt="" /></td>';
00564 break;
00565 default:
00566 $lines[] = '<td> </td>';
00567 $lines[] = '<td> </td>';
00568 $lines[] = '<td>Title</td>';
00569 $lines[] = '<td bgcolor="red">'.$this->printRemoveIndexed('ALL','Clear ALL phash-rows below!').'</td>';
00570 $lines[] = '<td>Description</td>';
00571 $lines[] = '<td>Size</td>';
00572 $lines[] = '<td>Indexed:</td>';
00573 break;
00574 }
00575
00576 $out = '<tr class="tableheader bgColor5">'.implode('',$lines).'</tr>';
00577 return $out;
00578 }
00579
00580
00581
00582
00583
00584
00585 function returnNumberOfColumns() {
00586 switch($this->pObj->MOD_SETTINGS['type']) {
00587 case 1:
00588 return 18;
00589 break;
00590 case 2:
00591 return 6;
00592 break;
00593 default:
00594 return 7;
00595 break;
00596 }
00597 }
00598
00599
00600
00601
00602
00603
00604
00605
00606
00607
00608
00609
00610
00611
00612
00613
00614
00615
00616
00617
00618
00619
00620
00621 function showDetailsForPhash($phash) {
00622
00623 $content = '';
00624
00625
00626 $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
00627 '*',
00628 'index_phash',
00629 'phash = '.intval($phash)
00630 );
00631 $phashRecord = $ftrows[0];
00632
00633
00634 if (is_array($phashRecord)) {
00635 $content.= '<h4>phash row content:</h4>'.
00636 $this->utf8_to_currentCharset(t3lib_div::view_array($phashRecord));
00637
00638
00639 $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
00640 '*',
00641 'index_debug',
00642 'phash = '.intval($phash)
00643 );
00644 if (is_array($ftrows)) {
00645 $debugInfo = unserialize($ftrows[0]['debuginfo']);
00646 $lexer = $debugInfo['lexer'];
00647 unset($debugInfo['lexer']);
00648
00649 $content.= '<h3>Debug information:</h3>'.
00650 $this->utf8_to_currentCharset(t3lib_div::view_array($debugInfo));
00651
00652 $content.= '<h4>Debug information / lexer splitting:</h4>'.
00653 '<hr/><strong>'.
00654 $this->utf8_to_currentCharset($lexer).
00655 '</strong><hr/>';
00656 }
00657
00658
00659
00660 $content.='<h3>Word statistics</h3>';
00661
00662
00663 $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
00664 'index_words.*, index_rel.*',
00665 'index_rel, index_words',
00666 'index_rel.phash = '.intval($phash).
00667 ' AND index_words.wid = index_rel.wid',
00668 '',
00669 'index_words.baseword',
00670 ''
00671 );
00672 $pageRec = t3lib_BEfunc::getRecord('pages', $phashRecord['data_page_id']);
00673 $showStopWordCheckBox = $GLOBALS['BE_USER']->isAdmin();
00674 $content.= $this->listWords($ftrows, 'All words found on page ('.count($ftrows).'):', $showStopWordCheckBox, $pageRec);
00675
00676
00677 $metaphone = array();
00678 foreach($ftrows as $row) {
00679 $metaphone[$row['metaphone']][] = $row['baseword'];
00680 }
00681 $content.= $this->listMetaphoneStat($metaphone, 'Metaphone stats:');
00682
00683
00684 $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
00685 'index_words.baseword, index_words.metaphone, index_rel.*',
00686 'index_rel, index_words',
00687 'index_rel.phash = '.intval($phash).
00688 ' AND index_words.wid = index_rel.wid
00689 AND index_words.is_stopword=0',
00690 '',
00691 'index_rel.freq DESC',
00692 '20'
00693 );
00694 $content.= $this->listWords($ftrows, 'Top-20 words by frequency:', 2);
00695
00696
00697 $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
00698 'index_words.baseword, index_words.metaphone, index_rel.*',
00699 'index_rel, index_words',
00700 'index_rel.phash = '.intval($phash).
00701 ' AND index_words.wid = index_rel.wid
00702 AND index_words.is_stopword=0',
00703 '',
00704 'index_rel.count DESC',
00705 '20'
00706 );
00707 $content.= $this->listWords($ftrows, 'Top-20 words by count:', 2);
00708
00709
00710 $content.='<h3>Section records for this phash</h3>';
00711
00712
00713 $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
00714 '*',
00715 'index_section',
00716 'index_section.phash = '.intval($phash),
00717 '',
00718 '',
00719 ''
00720 );
00721 $content.= t3lib_div::view_array($ftrows);
00722
00723
00724 $content = $this->linkList().$content.$this->linkList();
00725
00726 } else $content.= 'Error: No phash row found';
00727
00728 return $content;
00729 }
00730
00731
00732
00733
00734
00735
00736
00737
00738
00739
00740 function listWords($ftrows,$header, $stopWordBoxes=FALSE, $page='') {
00741
00742
00743 $keywords = is_array($page) ? array_flip(t3lib_div::trimExplode(',',$page['keywords'], 1)) : '';
00744
00745
00746 $trows = '';
00747 $trows.= '
00748 <tr class="tableheader bgColor5">
00749 '.($stopWordBoxes ? '<td>'.htmlspecialchars('Stopword:').'</td>' : '').'
00750 <td>'.htmlspecialchars('Word:').'</td>
00751 <td>'.htmlspecialchars('Count:').'</td>
00752 <td>'.htmlspecialchars('First:').'</td>
00753 <td>'.htmlspecialchars('Frequency:').'</td>
00754 <td>'.htmlspecialchars('Flags:').'</td>
00755 '.(is_array($keywords) ? '<td>'.htmlspecialchars('Page keyword:').'</td>' : '').'
00756 </tr>
00757 ';
00758 foreach($ftrows as $row) {
00759 $hiddenField = $stopWordBoxes!=2 ? '<input type="hidden" name="stopWord['.$row['wid'].']" value="0" />' : '';
00760 $trows.= '
00761 <tr class="'.($row['is_stopword'] ? 'bgColor' : 'bgColor4').'">
00762 '.($stopWordBoxes ? '<td align="center"'.($row['is_stopword'] ? ' style="background-color:red;"' : '').'>'.$hiddenField.'<input type="checkbox" name="stopWord['.$row['wid'].']" value="1"'.($row['is_stopword']?'checked="checked"':'').' /></td>' : '').'
00763 <td>'.$this->linkWordDetails(htmlspecialchars($this->utf8_to_currentCharset($row['baseword'])), $row['wid']).'</td>
00764 <td>'.htmlspecialchars($row['count']).'</td>
00765 <td>'.htmlspecialchars($row['first']).'</td>
00766 <td>'.htmlspecialchars($row['freq']).'</td>
00767 <td>'.htmlspecialchars($this->flagsMsg($row['flags'])).'</td>
00768 '.(is_array($keywords) ? '<td align="center"'.(isset($keywords[$row['baseword']]) ? ' class="bgColor2"' : '').'><input type="hidden" name="pageKeyword['.$row['baseword'].']" value="0" /><input type="checkbox" name="pageKeyword['.$row['baseword'].']" value="1"'.(isset($keywords[$row['baseword']])?'checked="checked"':'').' /></td>' : '').'
00769 </tr>
00770 ';
00771 }
00772
00773 return '<h4>'.htmlspecialchars($header).'</h4>'.
00774 '
00775 <table border="0" cellspacing="1" cellpadding="2" class="c-list">
00776 '.$trows.'
00777 </table>'.
00778 ($stopWordBoxes ? '<input type="submit" value="Change stop-word settings" name="_stopwords" onclick="document.webinfoForm.action=\''.htmlspecialchars(t3lib_div::getIndpEnv('REQUEST_URI')).'\';" />' : '').
00779 (is_array($keywords) ? '<input type="submit" value="Set page keywords" name="_pageKeywords" onclick="document.webinfoForm.action=\''.htmlspecialchars(t3lib_div::getIndpEnv('REQUEST_URI')).'\';" /><input type="hidden" name="pageKeyword_pageUid" value="'.$page['uid'].'" />'.
00780 '<br />Current keywords are: <em>' . htmlspecialchars(implode(', ', array_keys($keywords))) . '</em>' : '');
00781 }
00782
00783
00784
00785
00786
00787
00788
00789
00790 function listMetaphoneStat($ftrows,$header) {
00791
00792 $trows = '';
00793 $trows.= '
00794 <tr class="tableheader bgColor5">
00795 <td>'.htmlspecialchars('Metaphone:').'</td>
00796 <td>'.htmlspecialchars('Hash:').'</td>
00797 <td>'.htmlspecialchars('Count:').'</td>
00798 <td>'.htmlspecialchars('Words:').'</td>
00799 </tr>
00800 ';
00801 foreach($ftrows as $metaphone => $words) {
00802 if (count($words)>1) {
00803 $trows.= '
00804 <tr class="bgColor4">
00805 <td>'.$this->linkMetaPhoneDetails($this->indexerObj->metaphone($words[0],1),$metaphone).'</td>
00806 <td>'.htmlspecialchars($metaphone).'</td>
00807 <td>'.htmlspecialchars(count($words)).'</td>
00808 <td style="white-space: normal;">'.htmlspecialchars($this->utf8_to_currentCharset(implode(', ',$words))).'</td>
00809 </tr>
00810 ';
00811 }
00812 }
00813
00814 return '<h4>'.htmlspecialchars($header).'</h4>'.
00815 '<table border="0" cellspacing="1" cellpadding="2" class="c-list">
00816 '.$trows.'
00817 </table>';
00818 }
00819
00820
00821
00822
00823
00824
00825
00826
00827 function linkWordDetails($string,$wid) {
00828 return '<a href="'.htmlspecialchars(t3lib_div::linkThisScript(array('wid'=>$wid,'phash'=>''))).'">'.$string.'</a>';
00829 }
00830
00831
00832
00833
00834
00835
00836
00837
00838
00839 function linkMetaPhoneDetails($string,$metaphone) {
00840 return '<a href="'.htmlspecialchars(t3lib_div::linkThisScript(array('metaphone'=>$metaphone,'wid'=>'','phash'=>''))).'">'.$string.'</a>';
00841 }
00842
00843
00844
00845
00846
00847
00848
00849 function flagsMsg($flags) {
00850 if ($flags > 0) {
00851 return
00852 ($flags & 128 ? '<title>' : '').
00853 ($flags & 64 ? '<meta/keywords>' : '').
00854 ($flags & 32 ? '<meta/description>' : '').
00855 ' ('.$flags.')';
00856 }
00857 }
00858
00859
00860
00861
00862
00863
00864
00865
00866
00867
00868
00869
00870
00871
00872
00873
00874
00875
00876
00877
00878
00879
00880 function showDetailsForWord($wid) {
00881
00882
00883 $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
00884 'index_phash.*, index_section.*, index_rel.*',
00885 'index_rel, index_section, index_phash',
00886 'index_rel.wid = '.intval($wid).
00887 ' AND index_rel.phash = index_section.phash'.
00888 ' AND index_section.phash = index_phash.phash',
00889 '',
00890 'index_rel.freq DESC',
00891 ''
00892 );
00893
00894
00895 $content.='
00896 <tr class="tableheader bgColor5">
00897 <td>phash</td>
00898 <td>page_id</td>
00899 <td>data_filename</td>
00900 <td>count</td>
00901 <td>first</td>
00902 <td>freq</td>
00903 <td>flags</td>
00904 </tr>';
00905
00906 if (is_array($ftrows)) {
00907 foreach($ftrows as $wDat) {
00908 $content.='
00909 <tr class="bgColor4">
00910 <td>'.$this->linkDetails(htmlspecialchars($wDat['phash']),$wDat['phash']).'</td>
00911 <td>'.htmlspecialchars($wDat['page_id']).'</td>
00912 <td>'.htmlspecialchars($wDat['data_filename']).'</td>
00913 <td>'.htmlspecialchars($wDat['count']).'</td>
00914 <td>'.htmlspecialchars($wDat['first']).'</td>
00915 <td>'.htmlspecialchars($wDat['freq']).'</td>
00916 <td>'.htmlspecialchars($wDat['flags']).'</td>
00917 </tr>';
00918 }
00919 }
00920
00921
00922 $content = '
00923 <table border="0" cellspacing="1" cellpadding="2" class="c-list">'.
00924 $content.'
00925 </table>';
00926
00927
00928 $content = $content.$this->linkList();
00929
00930 return $content;
00931 }
00932
00933
00934
00935
00936
00937
00938
00939 function showDetailsForMetaphone($metaphone) {
00940
00941
00942 $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
00943 'index_words.*',
00944 'index_words',
00945 'index_words.metaphone = '.intval($metaphone),
00946 '',
00947 'index_words.baseword',
00948 ''
00949 );
00950
00951 if (count($ftrows)) {
00952 $content.='<h4>Metaphone: '.$this->indexerObj->metaphone($ftrows[0]['baseword'],1).'</h4>';
00953
00954 $content.='
00955 <tr class="tableheader bgColor5">
00956 <td>Word</td>
00957 <td>Is stopword?</td>
00958 </tr>';
00959
00960 if (is_array($ftrows)) {
00961 foreach($ftrows as $wDat) {
00962 $content.='
00963 <tr class="bgColor4">
00964 <td>'.$this->linkWordDetails(htmlspecialchars($wDat['baseword']),$wDat['wid']).'</td>
00965 <td>'.htmlspecialchars($wDat['is_stopword'] ? 'YES' : 'No').'</td>
00966 </tr>';
00967 }
00968 }
00969
00970 $content = '
00971 <table border="0" cellspacing="1" cellpadding="2" class="c-list">'.
00972 $content.'
00973 </table>';
00974
00975 if ($this->indexerObj->metaphone($ftrows[0]['baseword'])!=$metaphone) {
00976 $content.='ERROR: Metaphone string and hash did not match for some reason!?';
00977 }
00978
00979
00980 $content = $content.$this->linkList();
00981 }
00982
00983 return $content;
00984 }
00985
00986
00987
00988
00989
00990
00991
00992
00993
00994
00995
00996
00997
00998
00999
01000
01001
01002
01003
01004
01005
01006
01007
01008
01009
01010 function printRemoveIndexed($phash,$alt) {
01011 return '<a href="'.htmlspecialchars(t3lib_div::linkThisScript(array('deletePhash'=>$phash))).'" title="' . htmlspecialchars($alt) . '">' .
01012 t3lib_iconWorks::getSpriteIcon('actions-edit-delete') .
01013 '</a>';
01014 }
01015
01016
01017
01018
01019
01020
01021
01022
01023 function printReindex($resultRow,$alt) {
01024 if ($resultRow['item_type'] && $resultRow['item_type']!=='0') {
01025 return '<a href="'.htmlspecialchars(t3lib_div::linkThisScript(array('reindex'=>$resultRow['phash'],'reindex_id'=>$resultRow['page_id']))).'">'.
01026 '<img '.t3lib_iconWorks::skinImg($GLOBALS['BACK_PATH'], 'gfx/refresh_n.gif', 'width="14" height="14"') . ' hspace="1" vspace="2" border="0" title="'.htmlspecialchars($alt).'" alt="" />'.
01027 '</a>';
01028 }
01029 }
01030
01031
01032
01033
01034
01035
01036
01037
01038 function linkDetails($string,$phash) {
01039 return '<a href="'.htmlspecialchars(t3lib_div::linkThisScript(array('phash'=>$phash))).'">'.$string.'</a>';
01040 }
01041
01042
01043
01044
01045
01046
01047 function linkList() {
01048 return '<br /><a href="index.php?id=' . $this->pObj->id . '">Back to list.</a><br />';
01049 }
01050
01051
01052
01053
01054
01055
01056
01057
01058 function showPageDetails($string,$id) {
01059 return '<a href="'.htmlspecialchars('index.php?id='.$id.'&SET[depth]=0&SET[type]=1').'">'.$string.'</a>';
01060 }
01061
01062
01063
01064
01065
01066
01067
01068 function printExtraGrListRows($extraGrListRows) {
01069 if (count($extraGrListRows)) {
01070 $lines=array();
01071 foreach ($extraGrListRows as $r) {
01072 $lines[] = $r['gr_list'];
01073 }
01074 return '<br />' . $GLOBALS['TBE_TEMPLATE']->dfw(implode('<br />', $lines));
01075 }
01076 }
01077
01078
01079
01080
01081
01082
01083
01084 function printRootlineInfo($row) {
01085 $uidCollection = array();
01086
01087 if ($row['rl0']) {
01088 $uidCollection[0] = $row['rl0'];
01089 if ($row['rl1']) {
01090 $uidCollection[1] = $row['rl1'];
01091 if ($row['rl2']) {
01092 $uidCollection[2] = $row['rl2'];
01093
01094
01095 if (is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['addRootLineFields'])) {
01096 foreach($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['addRootLineFields'] as $fieldName => $rootLineLevel) {
01097 if ($row[$fieldName]) {
01098 $uidCollection[$rootLineLevel] = $row[$fieldName];
01099 }
01100 }
01101 }
01102 }
01103 }
01104 }
01105
01106
01107 ksort($uidCollection);
01108 return implode('/',$uidCollection);
01109 }
01110
01111
01112
01113
01114
01115
01116
01117
01118 function makeItemTypeIcon($it,$alt='') {
01119 if (!isset($this->iconFileNameCache[$it])) {
01120 if ($it==='0') {
01121 $icon = 'EXT:indexed_search/pi/res/pages.gif';
01122 } elseif ($this->external_parsers[$it]) {
01123 $icon = $this->external_parsers[$it]->getIcon($it);
01124 }
01125
01126 $fullPath = t3lib_div::getFileAbsFileName($icon);
01127
01128 if ($fullPath) {
01129 $info = @getimagesize($fullPath);
01130 $iconPath = $GLOBALS['BACK_PATH'].'../'.substr($fullPath,strlen(PATH_site));
01131 $this->iconFileNameCache[$it] = is_array($info) ? '<img src="'.$iconPath.'" '.$info[3].' title="###TITLE_ATTRIBUTE###" alt="" />' : '';
01132 }
01133 }
01134 return str_replace('###TITLE_ATTRIBUTE###',htmlspecialchars($it.': '.$alt),$this->iconFileNameCache[$it]);
01135 }
01136
01137
01138
01139
01140
01141
01142
01143 function utf8_to_currentCharset($string) {
01144 global $LANG;
01145 if ($LANG->charSet != 'utf-8') {
01146 $string = $LANG->csConvObj->utf8_decode($string, $LANG->charSet, TRUE);
01147 }
01148 return $string;
01149 }
01150
01151
01152
01153
01154
01155
01156
01157
01158
01159
01160
01161
01162
01163
01164
01165
01166
01167
01168
01169
01170
01171
01172
01173
01174
01175 function reindexPhash($phash, $pageId) {
01176
01177
01178 list($resultRow) = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
01179 'ISEC.*, IP.*',
01180 'index_phash IP, index_section ISEC',
01181 'IP.phash = ISEC.phash
01182 AND IP.phash = '.intval($phash).'
01183 AND ISEC.page_id = '.intval($pageId)
01184 );
01185
01186 $content = '';
01187 if (is_array($resultRow)) {
01188 if ($resultRow['item_type'] && $resultRow['item_type']!=='0') {
01189
01190
01191 $indexerObj = t3lib_div::makeInstance('tx_indexedsearch_indexer');
01192 $indexerObj->backend_initIndexer($pageId, 0, 0, '', $this->getUidRootLineForClosestTemplate($pageId));
01193
01194
01195 if ($resultRow['externalUrl']) {
01196 $indexerObj->indexExternalUrl($resultRow['data_filename']);
01197 } else {
01198 $indexerObj->indexRegularDocument($resultRow['data_filename'], TRUE);
01199 }
01200
01201 if ($indexerObj->file_phash_arr['phash'] != $resultRow['phash']) {
01202 $content.= 'ERROR: phash ('.$indexerObj->file_phash_arr['phash'].') did NOT match '.$resultRow['phash'].' for strange reasons!';
01203 }
01204
01205 $content.='<h4>Log for re-indexing of "'.htmlspecialchars($resultRow['data_filename']).'":</h4>';
01206 $content.=t3lib_div::view_array($indexerObj->internal_log);
01207
01208 $content.='<h4>Hash-array, page:</h4>';
01209 $content.=t3lib_div::view_array($indexerObj->hash);
01210
01211 $content.='<h4>Hash-array, file:</h4>';
01212 $content.=t3lib_div::view_array($indexerObj->file_phash_arr);
01213 }
01214 }
01215
01216
01217 $content.= $this->linkList();
01218
01219 return $content;
01220 }
01221
01222
01223
01224
01225
01226
01227
01228
01229 function getUidRootLineForClosestTemplate($id) {
01230 $tmpl = t3lib_div::makeInstance('t3lib_tsparser_ext');
01231 $tmpl->tt_track = 0;
01232 $tmpl->init();
01233
01234
01235 $sys_page = t3lib_div::makeInstance('t3lib_pageSelect');
01236 $rootLine = $sys_page->getRootLine($id);
01237 $tmpl->runThroughTemplates($rootLine,0);
01238
01239
01240 $rootline_uids = array();
01241 foreach($tmpl->rootLine as $rlkey => $rldat) {
01242 $rootline_uids[$rlkey] = $rldat['uid'];
01243 }
01244
01245 return $rootline_uids;
01246 }
01247
01248
01249
01250
01251
01252
01253
01254
01255
01256
01257
01258
01259
01260
01261
01262
01263
01264
01265
01266
01267
01268
01269
01270
01271
01272 function removeIndexedPhashRow($phashList,$clearPageCache=1) {
01273
01274 if ($phashList=='ALL') {
01275 $this->drawTableOfIndexedPages();
01276 $phashRows = $this->allPhashListed;
01277 $this->allPhashListed = array();
01278 } else {
01279 $phashRows = t3lib_div::trimExplode(',',$phashList,1);
01280 }
01281
01282 foreach($phashRows as $phash) {
01283 $phash = intval($phash);
01284 if ($phash>0) {
01285
01286 if ($clearPageCache) {
01287
01288 $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('page_id', 'index_section', 'phash='.intval($phash));
01289 if ($GLOBALS['TYPO3_DB']->sql_num_rows($res)) {
01290 $idList = array();
01291 while($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) {
01292 $idList[] = (int)$row['page_id'];
01293 }
01294
01295 if (TYPO3_UseCachingFramework) {
01296 $pageCache = $GLOBALS['typo3CacheManager']->getCache('cache_pages');
01297 foreach ($idList as $pageId) {
01298 $pageCache->flushByTag('pageId_' . $pageId);
01299 }
01300 } else {
01301 $GLOBALS['TYPO3_DB']->exec_DELETEquery('cache_pages', 'page_id IN (' . implode(',', $idList) . ')');
01302 }
01303 }
01304 }
01305
01306
01307 $tableArr = explode(',','index_phash,index_rel,index_section,index_grlist,index_fulltext,index_debug');
01308 foreach($tableArr as $table) {
01309 $GLOBALS['TYPO3_DB']->exec_DELETEquery($table, 'phash='.intval($phash));
01310 }
01311
01312
01313 }
01314 }
01315 }
01316
01317
01318
01319
01320
01321
01322
01323
01324 function getGrListEntriesForPhash($phash,$gr_list) {
01325 $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('*', 'index_grlist', 'phash='.intval($phash));
01326 $lines = array();
01327 $isRemoved = 0;
01328 while($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) {
01329 if (!$isRemoved && !strcmp($row['gr_list'],$gr_list)) {
01330 $isRemoved = 1;
01331 } else {
01332 $lines[] = $row;
01333 }
01334 }
01335 return $lines;
01336 }
01337
01338
01339
01340
01341
01342
01343
01344 function processStopWords($stopWords) {
01345
01346 if ($GLOBALS['BE_USER']->isAdmin()) {
01347
01348 foreach($stopWords as $wid => $state) {
01349 $fieldArray = array(
01350 'is_stopword' => $state
01351 );
01352 $GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_words', 'wid='.$wid, $fieldArray);
01353 }
01354 }
01355 }
01356
01357
01358
01359
01360
01361
01362
01363
01364 function processPageKeywords($pageKeywords, $pageUid) {
01365
01366
01367 $pageRec = t3lib_BEfunc::getRecord('pages', $pageUid);
01368 $keywords = array_flip(t3lib_div::trimExplode(',', $pageRec['keywords'], 1));
01369
01370
01371 foreach($pageKeywords as $key => $v) {
01372 if ($v) {
01373 $keywords[$key]=1;
01374 } else {
01375 unset($keywords[$key]);
01376 }
01377 }
01378
01379
01380 $data = array();
01381 $data['pages'][$pageUid]['keywords'] = implode(', ',array_keys($keywords));
01382
01383 $tce = t3lib_div::makeInstance('t3lib_TCEmain');
01384 $tce->stripslashes_values = 0;
01385 $tce->start($data,array());
01386 $tce->process_datamap();
01387 }
01388 }
01389
01390
01391
01392 if (defined('TYPO3_MODE') && $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/indexed_search/modfunc1/class.tx_indexedsearch_modfunc1.php']) {
01393 include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/indexed_search/modfunc1/class.tx_indexedsearch_modfunc1.php']);
01394 }
01395
01396 ?>