|
TYPO3 API
SVNRelease
|
Public Member Functions | |
| hook_indexContent (&$pObj) | |
| backend_initIndexer ($id, $type, $sys_language_uid, $MP, $uidRL, $cHash_array=array(), $createCHash=FALSE) | |
| backend_setFreeIndexUid ($freeIndexUid, $freeIndexSetId=0) | |
| backend_indexAsTYPO3Page ($title, $keywords, $description, $content, $charset, $mtime, $crdate=0, $recordUid=0) | |
| init () | |
| initializeExternalParsers () | |
| indexTypo3PageContent () | |
| splitHTMLContent ($content) | |
| getHTMLcharset ($content) | |
| convertHTMLToUtf8 ($content, $charset='') | |
| embracingTags ($string, $tagName, &$tagContent, &$stringAfter, &$paramList) | |
| typoSearchTags (&$body) | |
| extractLinks ($content) | |
| extractHyperLinks ($html) | |
| extractBaseHref ($html) | |
| indexExternalUrl ($externalUrl) | |
| getUrlHeaders ($url) | |
| indexRegularDocument ($file, $force=FALSE, $contentTmpFile='', $altExtension='') | |
| readFileContent ($ext, $absFile, $cPKey) | |
| fileContentParts ($ext, $absFile) | |
| splitRegularContent ($content) | |
| charsetEntity2utf8 (&$contentArr, $charset) | |
| processWordsInArrays ($contentArr) | |
| bodyDescription ($contentArr) | |
| indexAnalyze ($content) | |
| analyzeHeaderinfo (&$retArr, $content, $key, $offset) | |
| analyzeBody (&$retArr, $content) | |
| metaphone ($word, $retRaw=FALSE) | |
| submitPage () | |
| submit_grlist ($hash, $phash_x) | |
| submit_section ($hash, $hash_t3) | |
| removeOldIndexedPages ($phash) | |
| submitFilePage ($hash, $file, $subinfo, $ext, $mtime, $ctime, $size, $content_md5h, $contentParts) | |
| submitFile_grlist ($hash) | |
| submitFile_section ($hash) | |
| removeOldIndexedFiles ($phash) | |
| checkMtimeTstamp ($mtime, $phash) | |
| checkContentHash () | |
| checkExternalDocContentHash ($hashGr, $content_md5h) | |
| is_grlist_set ($phash_x) | |
| update_grlist ($phash, $phash_x) | |
| updateTstamp ($phash, $mtime=0) | |
| updateSetId ($phash) | |
| updateParsetime ($phash, $parsetime) | |
| updateRootline () | |
| getRootLineFields (&$fieldArr) | |
| removeLoginpagesWithContentHash () | |
| includeCrawlerClass () | |
| checkWordList ($wl) | |
| submitWords ($wl, $phash) | |
| freqMap ($freq) | |
| setT3Hashes () | |
| setExtHashes ($file, $subinfo=array()) | |
| md5inthash ($str) | |
| log_push ($msg, $key) | |
| log_pull () | |
| log_setTSlogMessage ($msg, $errorNum=0) | |
Public Attributes | |
| $reasons | |
| $excludeSections = 'script,style' | |
| $external_parsers = array() | |
| $defaultGrList = '0,-1' | |
| $tstamp_maxAge = 0 | |
| $tstamp_minAge = 0 | |
| $maxExternalFiles = 0 | |
| $forceIndexing = FALSE | |
| $crawlerActive = FALSE | |
| $defaultContentArray | |
| $wordcount = 0 | |
| $externalFileCounter = 0 | |
| $conf = array() | |
| $indexerConfig = array() | |
| $hash = array() | |
| $file_phash_arr = array() | |
| $contentParts = array() | |
| $content_md5h = '' | |
| $internal_log = array() | |
| $indexExternalUrl_content = '' | |
| $cHashParams = array() | |
| $freqRange = 32000 | |
| $freqMax = 0.1 | |
| $csObj | |
| $metaphoneObj | |
| $lexerObj | |
Protected Member Functions | |
| createLocalPath ($sourcePath) | |
| createLocalPathFromT3vars ($sourcePath) | |
| createLocalPathUsingDomainURL ($sourcePath) | |
| createLocalPathUsingAbsRefPrefix ($sourcePath) | |
| createLocalPathFromAbsoluteURL ($sourcePath) | |
| createLocalPathFromRelativeURL ($sourcePath) | |
| addSpacesToKeywordList ($keywordList) | |
Static Protected Member Functions | |
| static | isRelativeURL ($url) |
| static | isAllowedLocalFile ($filePath) |
Definition at line 136 of file class.indexer.php.
| tx_indexedsearch_indexer::addSpacesToKeywordList | ( | $ | keywordList | ) | [protected] |
Makes sure that keywords are space-separated. This is impotant for their proper displaying as a part of fulltext index.
| string | $keywordList |
Definition at line 2196 of file class.indexer.php.
References t3lib_div\trimExplode().
Referenced by splitHTMLContent().
| tx_indexedsearch_indexer::analyzeBody | ( | &$ | retArr, |
| $ | content | ||
| ) |
Calculates relevant information for bodycontent
| array | Index array, passed by reference |
| array | Standard content array |
Definition at line 1409 of file class.indexer.php.
References $content, and metaphone().
Referenced by indexAnalyze().
| tx_indexedsearch_indexer::analyzeHeaderinfo | ( | &$ | retArr, |
| $ | content, | ||
| $ | key, | ||
| $ | offset | ||
| ) |
Calculates relevant information for headercontent
| array | Index array, passed by reference |
| array | Standard content array |
| string | Key from standard content array |
| integer | Bit-wise priority to type |
Definition at line 1391 of file class.indexer.php.
References $content, and metaphone().
Referenced by indexAnalyze().
| tx_indexedsearch_indexer::backend_indexAsTYPO3Page | ( | $ | title, |
| $ | keywords, | ||
| $ | description, | ||
| $ | content, | ||
| $ | charset, | ||
| $ | mtime, | ||
| $ | crdate = 0, |
||
| $ | recordUid = 0 |
||
| ) |
Indexing records as the content of a TYPO3 page.
| string | Title equivalent |
| string | Keywords equivalent |
| string | Description equivalent |
| string | The main content to index |
| string | The charset of the title, keyword, description and body-content. MUST BE VALID, otherwise nothing is indexed! |
| integer | Last modification time, in seconds |
| integer | The creation date of the content, in seconds |
| integer | The record UID that the content comes from (for registration with the indexed rows) |
Definition at line 379 of file class.indexer.php.
References $content, and indexTypo3PageContent().
| tx_indexedsearch_indexer::backend_initIndexer | ( | $ | id, |
| $ | type, | ||
| $ | sys_language_uid, | ||
| $ | MP, | ||
| $ | uidRL, | ||
| $ | cHash_array = array(), |
||
| $ | createCHash = FALSE |
||
| ) |
Initializing the "combined ID" of the page (phash) being indexed (or for which external media is attached)
| integer | The page uid, &id= |
| integer | The page type, &type= |
| integer | sys_language uid, typically &L= |
| string | The MP variable (Mount Points), &MP= |
| array | Rootline array of only UIDs. |
| array | Array of GET variables to register with this indexing |
| boolean | If set, calculates a cHash value from the $cHash_array. Probably you will not do that since such cases are indexed through the frontend and the idea of this interface is to index non-cachable pages from the backend! |
Definition at line 321 of file class.indexer.php.
References t3lib_div\generateCHash(), t3lib_div\implodeArrayForUrl(), and init().
| tx_indexedsearch_indexer::backend_setFreeIndexUid | ( | $ | freeIndexUid, |
| $ | freeIndexSetId = 0 |
||
| ) |
Sets the free-index uid. Can be called right after backend_initIndexer()
| integer | Free index UID |
| integer | Set id - an integer identifying the "set" of indexing operations. |
Definition at line 361 of file class.indexer.php.
| tx_indexedsearch_indexer::bodyDescription | ( | $ | contentArr | ) |
Extracts the sample description text from the content array.
| array | Content array |
Definition at line 1348 of file class.indexer.php.
References t3lib_div\intInRange().
Referenced by submitFilePage(), and submitPage().
| tx_indexedsearch_indexer::charsetEntity2utf8 | ( | &$ | contentArr, |
| $ | charset | ||
| ) |
Convert character set and HTML entities in the value of input content array keys
| array | Standard content array |
| string | Charset of the input content (converted to utf-8) |
Definition at line 1304 of file class.indexer.php.
References $value.
Referenced by indexTypo3PageContent().
| tx_indexedsearch_indexer::checkContentHash | ( | ) |
Check content hash in phash table
Definition at line 1798 of file class.indexer.php.
References $GLOBALS, $res, and $row.
Referenced by indexTypo3PageContent().
| tx_indexedsearch_indexer::checkExternalDocContentHash | ( | $ | hashGr, |
| $ | content_md5h | ||
| ) |
Check content hash for external documents Returns true if the document needs to be indexed (that is, there was no result)
| integer | phash value to check (phash_grouping) |
| integer | Content hash to check |
Definition at line 1815 of file class.indexer.php.
References $content_md5h, $GLOBALS, $res, and $row.
Referenced by indexRegularDocument().
| tx_indexedsearch_indexer::checkMtimeTstamp | ( | $ | mtime, |
| $ | phash | ||
| ) |
Check the mtime / tstamp of the currently indexed page/file (based on phash) Return positive integer if the page needs to be indexed
| integer | mtime value to test against limits and indexed page (usually this is the mtime of the cached document) |
| integer | "phash" used to select any already indexed page to see what its mtime is. |
Definition at line 1762 of file class.indexer.php.
References $GLOBALS, $res, $row, log_setTSlogMessage(), and updateTstamp().
Referenced by indexRegularDocument(), and indexTypo3PageContent().
| tx_indexedsearch_indexer::checkWordList | ( | $ | wl | ) |
Adds new words to db
| array | Word List array (where each word has information about position etc). |
Definition at line 1981 of file class.indexer.php.
References $GLOBALS, $res, $row, $value, and log_setTSlogMessage().
Referenced by indexRegularDocument(), and indexTypo3PageContent().
| tx_indexedsearch_indexer::convertHTMLToUtf8 | ( | $ | content, |
| $ | charset = '' |
||
| ) |
Converts a HTML document to utf-8
| string | HTML content, any charset |
| string | Optional charset (otherwise extracted from HTML) |
Definition at line 677 of file class.indexer.php.
References $content, and getHTMLcharset().
| tx_indexedsearch_indexer::createLocalPath | ( | $ | sourcePath | ) | [protected] |
Checks if the file is local
| $sourcePath |
Definition at line 971 of file class.indexer.php.
Referenced by extractHyperLinks().
| tx_indexedsearch_indexer::createLocalPathFromAbsoluteURL | ( | $ | sourcePath | ) | [protected] |
Attempts to create a local file path from the absolute URL without schema.
| string | $sourcePath |
Definition at line 1062 of file class.indexer.php.
| tx_indexedsearch_indexer::createLocalPathFromRelativeURL | ( | $ | sourcePath | ) | [protected] |
Attempts to create a local file path from the relative URL.
| string | $sourcePath |
Definition at line 1080 of file class.indexer.php.
| tx_indexedsearch_indexer::createLocalPathFromT3vars | ( | $ | sourcePath | ) | [protected] |
Attempts to create a local file path from T3VARs. This is useful for various download extensions that hide actual file name but still want the file to be indexed.
| string | $sourcePath |
Definition at line 997 of file class.indexer.php.
References $GLOBALS, and t3lib_div\shortMD5().
| tx_indexedsearch_indexer::createLocalPathUsingAbsRefPrefix | ( | $ | sourcePath | ) | [protected] |
Attempts to create a local file path by matching absRefPrefix. This requires TSFE. If TSFE is missing, this function does nothing.
| string | $sourcePath |
Definition at line 1039 of file class.indexer.php.
References $GLOBALS.
| tx_indexedsearch_indexer::createLocalPathUsingDomainURL | ( | $ | sourcePath | ) | [protected] |
Attempts to create a local file path by matching a current request URL.
| string | $sourcePath |
Definition at line 1018 of file class.indexer.php.
References t3lib_div\getIndpEnv().
| tx_indexedsearch_indexer::embracingTags | ( | $ | string, |
| $ | tagName, | ||
| &$ | tagContent, | ||
| &$ | stringAfter, | ||
| &$ | paramList | ||
| ) |
Finds first occurence of embracing tags and returns the embraced content and the original string with the tag removed in the two passed variables. Returns false if no match found. ie. useful for finding <title> of document or removing <script>-sections
| string | String to search in |
| string | Tag name, eg. "script" |
| string | Passed by reference: Content inside found tag |
| string | Passed by reference: Content after found tag |
| string | Passed by reference: Attributes of the found tag. |
Definition at line 705 of file class.indexer.php.
Referenced by splitHTMLContent().
| tx_indexedsearch_indexer::extractBaseHref | ( | $ | html | ) |
Extracts the "base href" from content string.
| string | Content to analyze |
Definition at line 878 of file class.indexer.php.
References t3lib_div\makeInstance().
| tx_indexedsearch_indexer::extractHyperLinks | ( | $ | html | ) |
Extracts all links to external documents from the HTML content string
| string | $html |
Definition at line 848 of file class.indexer.php.
References createLocalPath(), and t3lib_div\makeInstance().
Referenced by extractLinks().
| tx_indexedsearch_indexer::extractLinks | ( | $ | content | ) |
Extract links (hrefs) from HTML content and if indexable media is found, it is indexed.
| string | HTML content |
Definition at line 761 of file class.indexer.php.
References $content, extractHyperLinks(), t3lib_div\getFileAbsFileName(), t3lib_div\htmlspecialchars_decode(), includeCrawlerClass(), indexExternalUrl(), indexRegularDocument(), t3lib_div\isAllowedAbsPath(), t3lib_extMgm\isLoaded(), log_setTSlogMessage(), and t3lib_div\makeInstance().
Referenced by indexTypo3PageContent().
| tx_indexedsearch_indexer::fileContentParts | ( | $ | ext, |
| $ | absFile | ||
| ) |
Creates an array with pointers to divisions of document.
| string | File extension |
| string | Absolute filename (must exist and be validated OK before calling function) |
Definition at line 1253 of file class.indexer.php.
Referenced by indexRegularDocument().
| tx_indexedsearch_indexer::freqMap | ( | $ | freq | ) |
maps frequency from a real number in [0;1] to an integer in [0;$this->freqRange] with anything above $this->freqMax as 1 and back.
| double | Frequency |
Definition at line 2040 of file class.indexer.php.
Referenced by submitWords().
| tx_indexedsearch_indexer::getHTMLcharset | ( | $ | content | ) |
Extract the charset value from HTML meta tag.
| string | HTML content |
Definition at line 662 of file class.indexer.php.
References $content.
Referenced by convertHTMLToUtf8().
| tx_indexedsearch_indexer::getRootLineFields | ( | &$ | fieldArr | ) |
Adding values for root-line fields. rl0, rl1 and rl2 are standard. A hook might add more.
| array | Field array, passed by reference |
Definition at line 1918 of file class.indexer.php.
References $GLOBALS.
Referenced by submit_section(), and updateRootline().
| tx_indexedsearch_indexer::getUrlHeaders | ( | $ | url | ) |
Getting HTTP request headers of URL
| string | The URL |
| integer | Timeout (seconds?) |
Definition at line 944 of file class.indexer.php.
References $content, t3lib_div\getURL(), and t3lib_div\trimExplode().
Referenced by indexExternalUrl().
| tx_indexedsearch_indexer::hook_indexContent | ( | &$ | pObj | ) |
Parent Object (TSFE) Initialization
| object | Parent Object (frontend TSFE object), passed by reference |
Definition at line 219 of file class.indexer.php.
References $GLOBALS, $indexerConfig, indexTypo3PageContent(), init(), t3lib_extMgm\isLoaded(), log_pull(), log_push(), and log_setTSlogMessage().
| tx_indexedsearch_indexer::includeCrawlerClass | ( | ) |
Includes the crawler class
Definition at line 1954 of file class.indexer.php.
References $TYPO3_CONF_VARS, and t3lib_extMgm\extPath().
Referenced by extractLinks().
| tx_indexedsearch_indexer::indexAnalyze | ( | $ | content | ) |
Analyzes content to use for indexing,
| array | Standard content array: an array with the keys title,keywords,description and body, which all contain an array of words. |
Definition at line 1370 of file class.indexer.php.
References $content, analyzeBody(), and analyzeHeaderinfo().
Referenced by indexRegularDocument(), and indexTypo3PageContent().
| tx_indexedsearch_indexer::indexExternalUrl | ( | $ | externalUrl | ) |
Index External URLs HTML content
| string | URL, eg. "http://typo3.org/" |
Definition at line 911 of file class.indexer.php.
References $content, getUrlHeaders(), indexRegularDocument(), t3lib_div\tempnam(), and t3lib_div\writeFile().
Referenced by extractLinks().
| tx_indexedsearch_indexer::indexRegularDocument | ( | $ | file, |
| $ | force = FALSE, |
||
| $ | contentTmpFile = '', |
||
| $ | altExtension = '' |
||
| ) |
Indexing a regular document given as $file (relative to PATH_site, local file)
| string | Relative Filename, relative to PATH_site. It can also be an absolute path as long as it is inside the lockRootPath (validated with t3lib_div::isAbsPath()). Finally, if $contentTmpFile is set, this value can be anything, most likely a URL |
| boolean | If set, indexing is forced (despite content hashes, mtime etc). |
| string | Temporary file with the content to read it from (instead of $file). Used when the $file is a URL. |
| string | File extension for temporary file. |
Definition at line 1130 of file class.indexer.php.
References $content_md5h, $contentParts, checkExternalDocContentHash(), checkMtimeTstamp(), checkWordList(), fileContentParts(), t3lib_div\getFileAbsFileName(), indexAnalyze(), t3lib_div\isAbsPath(), t3lib_div\isAllowedAbsPath(), log_pull(), log_push(), log_setTSlogMessage(), md5inthash(), t3lib_div\milliseconds(), processWordsInArrays(), readFileContent(), setExtHashes(), submitFile_section(), submitFilePage(), submitWords(), updateParsetime(), and updateTstamp().
Referenced by extractLinks(), and indexExternalUrl().
| tx_indexedsearch_indexer::indexTypo3PageContent | ( | ) |
Start indexing of the TYPO3 page
Definition at line 523 of file class.indexer.php.
References charsetEntity2utf8(), checkContentHash(), checkMtimeTstamp(), checkWordList(), extractLinks(), indexAnalyze(), is_grlist_set(), log_pull(), log_push(), log_setTSlogMessage(), md5inthash(), t3lib_div\milliseconds(), processWordsInArrays(), splitHTMLContent(), submitPage(), submitWords(), update_grlist(), updateParsetime(), updateRootline(), updateSetId(), and updateTstamp().
Referenced by backend_indexAsTYPO3Page(), and hook_indexContent().
| tx_indexedsearch_indexer::init | ( | ) |
Initializes the object. $this->conf MUST be set with proper values prior to this call!!!
Definition at line 430 of file class.indexer.php.
References $GLOBALS, $TYPO3_CONF_VARS, t3lib_div\getUserObj(), initializeExternalParsers(), t3lib_div\intInRange(), t3lib_div\makeInstance(), and setT3Hashes().
Referenced by backend_initIndexer(), and hook_indexContent().
| tx_indexedsearch_indexer::initializeExternalParsers | ( | ) |
Initialize external parsers
Definition at line 482 of file class.indexer.php.
References $TYPO3_CONF_VARS, and t3lib_div\getUserObj().
Referenced by init().
| tx_indexedsearch_indexer::is_grlist_set | ( | $ | phash_x | ) |
Checks if a grlist record has been set for the phash value input (looking at the "real" phash of the current content, not the linked-to phash of the common search result page)
| integer | Phash integer to test. |
Definition at line 1829 of file class.indexer.php.
References $GLOBALS.
Referenced by indexTypo3PageContent().
| static tx_indexedsearch_indexer::isAllowedLocalFile | ( | $ | filePath | ) | [static, protected] |
Checks if the path points to the file inside the web site
| string | $filePath |
Definition at line 1108 of file class.indexer.php.
References t3lib_div\resolveBackPath().
| static tx_indexedsearch_indexer::isRelativeURL | ( | $ | url | ) | [static, protected] |
Checks if URL is relative.
| string | $url |
Definition at line 1097 of file class.indexer.php.
| tx_indexedsearch_indexer::log_pull | ( | ) |
Pull function wrapper for TT logging
Definition at line 2159 of file class.indexer.php.
References $GLOBALS.
Referenced by hook_indexContent(), indexRegularDocument(), and indexTypo3PageContent().
| tx_indexedsearch_indexer::log_push | ( | $ | msg, |
| $ | key | ||
| ) |
Push function wrapper for TT logging
| string | Title to set |
| string | Key (?) |
Definition at line 2150 of file class.indexer.php.
References $GLOBALS.
Referenced by hook_indexContent(), indexRegularDocument(), and indexTypo3PageContent().
| tx_indexedsearch_indexer::log_setTSlogMessage | ( | $ | msg, |
| $ | errorNum = 0 |
||
| ) |
Set log message function wrapper for TT logging
| string | Message to set |
| integer | Error number |
Definition at line 2170 of file class.indexer.php.
References $GLOBALS.
Referenced by checkMtimeTstamp(), checkWordList(), extractLinks(), hook_indexContent(), indexRegularDocument(), indexTypo3PageContent(), removeLoginpagesWithContentHash(), and update_grlist().
| tx_indexedsearch_indexer::md5inthash | ( | $ | str | ) |
md5 integer hash Using 7 instead of 8 just because that makes the integers lower than 32 bit (28 bit) and so they do not interfere with UNSIGNED integers or PHP-versions which has varying output from the hexdec function.
| string | String to hash |
Definition at line 2123 of file class.indexer.php.
Referenced by indexRegularDocument(), indexTypo3PageContent(), removeLoginpagesWithContentHash(), setExtHashes(), setT3Hashes(), submit_grlist(), submitFile_grlist(), and update_grlist().
| tx_indexedsearch_indexer::metaphone | ( | $ | word, |
| $ | retRaw = FALSE |
||
| ) |
Creating metaphone based hash from input word
| string | Word to convert |
| boolean | If set, returns the raw metaphone value (not hashed) |
Definition at line 1429 of file class.indexer.php.
Referenced by analyzeBody(), and analyzeHeaderinfo().
| tx_indexedsearch_indexer::processWordsInArrays | ( | $ | contentArr | ) |
Processing words in the array from split*Content -functions
| array | Array of content to index, see splitHTMLContent() and splitRegularContent() |
Definition at line 1326 of file class.indexer.php.
References $value.
Referenced by indexRegularDocument(), and indexTypo3PageContent().
| tx_indexedsearch_indexer::readFileContent | ( | $ | ext, |
| $ | absFile, | ||
| $ | cPKey | ||
| ) |
Reads the content of an external file being indexed. The content from the external parser MUST be returned in utf-8!
| string | File extension, eg. "pdf", "doc" etc. |
| string | Absolute filename of file (must exist and be validated OK before calling function) |
| string | Pointer to section (zero for all other than PDF which will have an indication of pages into which the document should be splitted.) |
Definition at line 1236 of file class.indexer.php.
Referenced by indexRegularDocument().
| tx_indexedsearch_indexer::removeLoginpagesWithContentHash | ( | ) |
Removes any indexed pages with userlogins which has the same contentHash NOT USED anywhere inside this class!
Definition at line 1937 of file class.indexer.php.
References $GLOBALS, $res, $row, log_setTSlogMessage(), md5inthash(), and removeOldIndexedPages().
| tx_indexedsearch_indexer::removeOldIndexedFiles | ( | $ | phash | ) |
Removes records for the indexed page, $phash
| integer | phash value to flush |
Definition at line 1726 of file class.indexer.php.
References $GLOBALS.
Referenced by submitFilePage().
| tx_indexedsearch_indexer::removeOldIndexedPages | ( | $ | phash | ) |
Removes records for the indexed page, $phash
| integer | phash value to flush |
Definition at line 1583 of file class.indexer.php.
References $GLOBALS.
Referenced by removeLoginpagesWithContentHash(), and submitPage().
| tx_indexedsearch_indexer::setExtHashes | ( | $ | file, |
| $ | subinfo = array() |
||
| ) |
Get search hash, external files
| string | File name / path which identifies it on the server |
| array | Additional content identifying the (subpart of) content. For instance; PDF files are divided into groups of pages for indexing. |
Definition at line 2099 of file class.indexer.php.
References $hash, and md5inthash().
Referenced by indexRegularDocument().
| tx_indexedsearch_indexer::setT3Hashes | ( | ) |
Get search hash, T3 pages
Definition at line 2073 of file class.indexer.php.
References md5inthash().
Referenced by init().
| tx_indexedsearch_indexer::splitHTMLContent | ( | $ | content | ) |
Splits HTML content and returns an associative array, with title, a list of metatags, and a list of words in the body.
| string | HTML content to index. To some degree expected to be made by TYPO3 (ei. splitting the header by ":") |
Definition at line 610 of file class.indexer.php.
References $content, addSpacesToKeywordList(), embracingTags(), t3lib_div\get_tag_attributes(), and typoSearchTags().
Referenced by indexTypo3PageContent().
| tx_indexedsearch_indexer::splitRegularContent | ( | $ | content | ) |
Splits non-HTML content (from external files for instance)
| string | Input content (non-HTML) to index. |
Definition at line 1271 of file class.indexer.php.
References $content.
| tx_indexedsearch_indexer::submit_grlist | ( | $ | hash, |
| $ | phash_x | ||
| ) |
Stores gr_list in the database.
| integer | Search result record phash |
| integer | Actual phash of current content |
Definition at line 1545 of file class.indexer.php.
References $GLOBALS, $hash, and md5inthash().
Referenced by submitFile_grlist(), submitPage(), and update_grlist().
| tx_indexedsearch_indexer::submit_section | ( | $ | hash, |
| $ | hash_t3 | ||
| ) |
Stores section $hash and $hash_t3 are the same for TYPO3 pages, but different when it is external files.
| integer | phash of TYPO3 parent search result record |
| integer | phash of the file indexation search record |
Definition at line 1565 of file class.indexer.php.
References $GLOBALS, $hash, and getRootLineFields().
Referenced by submitFile_section(), and submitPage().
| tx_indexedsearch_indexer::submitFile_grlist | ( | $ | hash | ) |
Stores file gr_list for a file IF it does not exist already
| integer | phash value of file |
Definition at line 1692 of file class.indexer.php.
References $GLOBALS, $hash, md5inthash(), and submit_grlist().
| tx_indexedsearch_indexer::submitFile_section | ( | $ | hash | ) |
Stores file section for a file IF it does not exist
| integer | phash value of file |
Definition at line 1712 of file class.indexer.php.
References $GLOBALS, $hash, $res, and submit_section().
Referenced by indexRegularDocument().
| tx_indexedsearch_indexer::submitFilePage | ( | $ | hash, |
| $ | file, | ||
| $ | subinfo, | ||
| $ | ext, | ||
| $ | mtime, | ||
| $ | ctime, | ||
| $ | size, | ||
| $ | content_md5h, | ||
| $ | contentParts | ||
| ) |
Updates db with information about the file
| array | Array with phash and phash_grouping keys for file |
| string | File name |
| array | Array of "cHashParams" for files: This is for instance the page index for a PDF file (other document types it will be a zero) |
| string | File extension determining the type of media. |
| integer | Modification time of file. |
| integer | Creation time of file. |
| integer | Size of file in bytes |
| integer | Content HASH value. |
| array | Standard content array (using only title and body for a file) |
Definition at line 1626 of file class.indexer.php.
References $content_md5h, $contentParts, $GLOBALS, $hash, bodyDescription(), and removeOldIndexedFiles().
Referenced by indexRegularDocument().
| tx_indexedsearch_indexer::submitPage | ( | ) |
Updates db with information about the page (TYPO3 page, not external media)
Definition at line 1471 of file class.indexer.php.
References $GLOBALS, bodyDescription(), removeOldIndexedPages(), submit_grlist(), and submit_section().
Referenced by indexTypo3PageContent().
| tx_indexedsearch_indexer::submitWords | ( | $ | wl, |
| $ | phash | ||
| ) |
Submits RELATIONS between words and phash
| array | Word list array |
| integer | phash value |
Definition at line 2016 of file class.indexer.php.
References $GLOBALS, and freqMap().
Referenced by indexRegularDocument(), and indexTypo3PageContent().
| tx_indexedsearch_indexer::typoSearchTags | ( | &$ | body | ) |
Removes content that shouldn't be indexed according to TYPO3SEARCH-tags.
| string | HTML Content, passed by reference |
Definition at line 732 of file class.indexer.php.
Referenced by splitHTMLContent().
| tx_indexedsearch_indexer::update_grlist | ( | $ | phash, |
| $ | phash_x | ||
| ) |
Check if an grlist-entry for this hash exists and if not so, write one.
| integer | phash of the search result that should be found |
| integer | The real phash of the current content. The two values are different when a page with userlogin turns out to contain the exact same content as another already indexed version of the page; This is the whole reason for the grlist table in fact... |
Definition at line 1845 of file class.indexer.php.
References $GLOBALS, $res, log_setTSlogMessage(), md5inthash(), and submit_grlist().
Referenced by indexTypo3PageContent().
| tx_indexedsearch_indexer::updateParsetime | ( | $ | phash, |
| $ | parsetime | ||
| ) |
Update parsetime for phash row.
| integer | phash value. |
| integer | Parsetime value to set. |
Definition at line 1890 of file class.indexer.php.
References $GLOBALS.
Referenced by indexRegularDocument(), and indexTypo3PageContent().
| tx_indexedsearch_indexer::updateRootline | ( | ) |
Update section rootline for the page
Definition at line 1903 of file class.indexer.php.
References $GLOBALS, and getRootLineFields().
Referenced by indexTypo3PageContent().
| tx_indexedsearch_indexer::updateSetId | ( | $ | phash | ) |
Update SetID of the index_phash record.
| integer | phash value |
Definition at line 1875 of file class.indexer.php.
References $GLOBALS.
Referenced by indexTypo3PageContent().
| tx_indexedsearch_indexer::updateTstamp | ( | $ | phash, |
| $ | mtime = 0 |
||
| ) |
Update tstamp for a phash row.
| integer | phash value |
| integer | If set, update the mtime field to this value. |
Definition at line 1860 of file class.indexer.php.
References $GLOBALS.
Referenced by checkMtimeTstamp(), indexRegularDocument(), and indexTypo3PageContent().
| tx_indexedsearch_indexer::$cHashParams = array() |
Definition at line 184 of file class.indexer.php.
| tx_indexedsearch_indexer::$conf = array() |
Definition at line 175 of file class.indexer.php.
| tx_indexedsearch_indexer::$content_md5h = '' |
Definition at line 180 of file class.indexer.php.
Referenced by checkExternalDocContentHash(), indexRegularDocument(), and submitFilePage().
| tx_indexedsearch_indexer::$contentParts = array() |
Definition at line 179 of file class.indexer.php.
Referenced by indexRegularDocument(), and submitFilePage().
| tx_indexedsearch_indexer::$crawlerActive = FALSE |
Definition at line 163 of file class.indexer.php.
| tx_indexedsearch_indexer::$csObj |
Definition at line 195 of file class.indexer.php.
| tx_indexedsearch_indexer::$defaultContentArray |
array(
'title' => '',
'description' => '',
'keywords' => '',
'body' => '',
)
Definition at line 166 of file class.indexer.php.
| tx_indexedsearch_indexer::$defaultGrList = '0,-1' |
Definition at line 155 of file class.indexer.php.
| tx_indexedsearch_indexer::$excludeSections = 'script,style' |
Definition at line 149 of file class.indexer.php.
| tx_indexedsearch_indexer::$external_parsers = array() |
Definition at line 152 of file class.indexer.php.
| tx_indexedsearch_indexer::$externalFileCounter = 0 |
Definition at line 173 of file class.indexer.php.
| tx_indexedsearch_indexer::$file_phash_arr = array() |
Definition at line 178 of file class.indexer.php.
| tx_indexedsearch_indexer::$forceIndexing = FALSE |
Definition at line 162 of file class.indexer.php.
| tx_indexedsearch_indexer::$freqMax = 0.1 |
Definition at line 187 of file class.indexer.php.
| tx_indexedsearch_indexer::$freqRange = 32000 |
Definition at line 186 of file class.indexer.php.
| tx_indexedsearch_indexer::$hash = array() |
Definition at line 177 of file class.indexer.php.
Referenced by setExtHashes(), submit_grlist(), submit_section(), submitFile_grlist(), submitFile_section(), and submitFilePage().
| tx_indexedsearch_indexer::$indexerConfig = array() |
Definition at line 176 of file class.indexer.php.
Referenced by hook_indexContent().
| tx_indexedsearch_indexer::$indexExternalUrl_content = '' |
Definition at line 182 of file class.indexer.php.
| tx_indexedsearch_indexer::$internal_log = array() |
Definition at line 181 of file class.indexer.php.
| tx_indexedsearch_indexer::$lexerObj |
Definition at line 209 of file class.indexer.php.
| tx_indexedsearch_indexer::$maxExternalFiles = 0 |
Definition at line 160 of file class.indexer.php.
| tx_indexedsearch_indexer::$metaphoneObj |
Definition at line 202 of file class.indexer.php.
| tx_indexedsearch_indexer::$reasons |
array(
-1 => 'mtime matched the document, so no changes detected and no content updated',
-2 => 'The minimum age was not exceeded',
1 => "The configured max-age was exceeded for the document and thus it's indexed.",
2 => 'The minimum age was exceed and mtime was set and the mtime was different, so the page was indexed.',
3 => 'The minimum age was exceed, but mtime was not set, so the page was indexed.',
4 => 'Page has never been indexed (is not represented in the index_phash table).'
)
Definition at line 139 of file class.indexer.php.
| tx_indexedsearch_indexer::$tstamp_maxAge = 0 |
Definition at line 158 of file class.indexer.php.
| tx_indexedsearch_indexer::$tstamp_minAge = 0 |
Definition at line 159 of file class.indexer.php.
| tx_indexedsearch_indexer::$wordcount = 0 |
Definition at line 172 of file class.indexer.php.
1.7.5.1