TYPO3 API  SVNRelease
class.t3lib_readmail.php
Go to the documentation of this file.
00001 <?php
00002 /***************************************************************
00003  *  Copyright notice
00004  *
00005  *  (c) 1999-2011 Kasper Skårhøj (kasperYYYY@typo3.com)
00006  *  All rights reserved
00007  *
00008  *  This script is part of the TYPO3 project. The TYPO3 project is
00009  *  free software; you can redistribute it and/or modify
00010  *  it under the terms of the GNU General Public License as published by
00011  *  the Free Software Foundation; either version 2 of the License, or
00012  *  (at your option) any later version.
00013  *
00014  *  The GNU General Public License can be found at
00015  *  http://www.gnu.org/copyleft/gpl.html.
00016  *  A copy is found in the textfile GPL.txt and important notices to the license
00017  *  from the author is found in LICENSE.txt distributed with these scripts.
00018  *
00019  *
00020  *  This script is distributed in the hope that it will be useful,
00021  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00022  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00023  *  GNU General Public License for more details.
00024  *
00025  *  This copyright notice MUST APPEAR in all copies of the script!
00026  ***************************************************************/
00027 /**
00028  * Contains a class with functions used to read email content
00029  *
00030  * $Id: class.t3lib_readmail.php 10121 2011-01-18 20:15:30Z ohader $
00031  * Revised for TYPO3 3.6 May 2003 by Kasper Skårhøj
00032  *
00033  * @author  Kasper Skårhøj <kasperYYYY@typo3.com>
00034  */
00035 /**
00036  * [CLASS/FUNCTION INDEX of SCRIPT]
00037  *
00038  *
00039  *
00040  *   83: class t3lib_readmail
00041  *
00042  *            SECTION: General
00043  *  113:     function getMessage($mailParts)
00044  *  138:     function getTextContent($content)
00045  *  153:     function getMailBoundaryParts($boundary,$content)
00046  *  173:     function getCType($str)
00047  *  196:     function analyseReturnError($c)
00048  *  251:     function decodeHeaderString($str)
00049  *  279:     function extractNameEmail($str)
00050  *  308:     function getContentTypeData($contentTypeStr)
00051  *  331:     function makeUnixDate($dateStr)
00052  *  354:     function getGMToffset($GMT)
00053  *  368:     function extractMailHeader($content,$limit=0)
00054  *  399:     function fullParse($content)
00055  *
00056  * TOTAL FUNCTIONS: 12
00057  * (This index is automatically created/updated by the extension "extdeveval")
00058  *
00059  */
00060 
00061 
00062 /**
00063  * Functions used to read email content
00064  * The class is still just a bunch of miscellaneous functions used to read content out of emails
00065  *
00066  * @author  Kasper Skårhøj <kasperYYYY@typo3.com>
00067  * @package TYPO3
00068  * @subpackage t3lib
00069  */
00070 class t3lib_readmail {
00071     var $dateAbbrevs = array(
00072         'JAN' => 1,
00073         'FEB' => 2,
00074         'MAR' => 3,
00075         'APR' => 4,
00076         'MAY' => 5,
00077         'JUN' => 6,
00078         'JUL' => 7,
00079         'AUG' => 8,
00080         'SEP' => 9,
00081         'OCT' => 10,
00082         'NOV' => 11,
00083         'DEC' => 12
00084     );
00085     var $serverGMToffsetMinutes = 60; // = +0100 (CET)
00086 
00087     /*******************************
00088      *
00089      * General
00090      *
00091      ********************************/
00092 
00093     /**
00094      * Returns the text content of a mail which has previously been parsed by eg. extractMailHeader()
00095      * Probably obsolete since the function fullParse() is more advanced and safer to use.
00096      *
00097      * @param   array       Output from extractMailHeader()
00098      * @return  string      The content.
00099      */
00100     function getMessage($mailParts) {
00101         if ($mailParts['content-type']) {
00102             $CType = $this->getCType($mailParts['content-type']);
00103             if ($CType['boundary']) {
00104                 $parts = $this->getMailBoundaryParts($CType['boundary'], $mailParts['CONTENT']);
00105                 $c = $this->getTextContent($parts[0]);
00106             } else {
00107                 $c = $this->getTextContent(
00108                     'Content-Type: ' . $mailParts['content-type'] . '
00109                     ' . $mailParts['CONTENT']
00110                 );
00111             }
00112         } else {
00113             $c = $mailParts['CONTENT'];
00114         }
00115 
00116         return $c;
00117     }
00118 
00119     /**
00120      * Returns the body part of a raw mail message (including headers)
00121      * Probably obsolete since the function fullParse() is more advanced and safer to use.
00122      *
00123      * @param   string      Raw mail content
00124      * @return  string      Body of message
00125      */
00126     function getTextContent($content) {
00127         $p = $this->extractMailHeader($content);
00128 
00129             // Here some decoding might be needed...
00130             // However we just return what is believed to be the proper notification:
00131         return $p['CONTENT'];
00132     }
00133 
00134     /**
00135      * Splits the body of a mail into parts based on the boundary string given.
00136      * Obsolete, use fullParse()
00137      *
00138      * @param   string      Boundary string used to split the content.
00139      * @param   string      BODY section of a mail
00140      * @return  array       Parts of the mail based on this
00141      */
00142     function getMailBoundaryParts($boundary, $content) {
00143         $mParts = explode('--' . $boundary, $content);
00144         unset($mParts[0]);
00145         $new = array();
00146         foreach ($mParts as $val) {
00147             if (trim($val) == '--') {
00148                 break;
00149             }
00150             $new[] = ltrim($val);
00151         }
00152 
00153         return $new;
00154     }
00155 
00156     /**
00157      * Returns Content Type plus more.
00158      * Obsolete, use fullParse()
00159      *
00160      * @param   string      "ContentType" string with more
00161      * @return  array       parts in key/value pairs
00162      * @ignore
00163      */
00164     function getCType($str) {
00165         $parts = explode(';', $str);
00166         $cTypes = array();
00167         $cTypes['ContentType'] = $parts[0];
00168         next($parts);
00169         while (list(, $ppstr) = each($parts)) {
00170             $mparts = explode('=', $ppstr, 2);
00171             if (count($mparts) > 1) {
00172                 $cTypes[strtolower(trim($mparts[0]))] = preg_replace('/^"/', '', trim(preg_replace('/"$/', '', trim($mparts[1]))));
00173             } else {
00174                 $cTypes[] = $ppstr;
00175             }
00176         }
00177 
00178         return $cTypes;
00179     }
00180 
00181     /**
00182      * Analyses the return-mail content for the Dmailer module - used to find what reason there was for rejecting the mail
00183      * Used by the Dmailer, but not exclusively.
00184      *
00185      * @param   string      message body/text
00186      * @return  array       key/value pairs with analysis result. Eg. "reason", "content", "reason_text", "mailserver" etc.
00187      */
00188     function analyseReturnError($c) {
00189         $cp = array();
00190         if (strstr($c, '--- Below this line is a copy of the message.')) { // QMAIL
00191             list($c) = explode('--- Below this line is a copy of the message.', $c); // Splits by the QMAIL divider
00192             $cp['content'] = trim($c);
00193             $parts = explode('>:', $c, 2);
00194             $cp['reason_text'] = trim($parts[1]);
00195             $cp['mailserver'] = 'Qmail';
00196             if (preg_match('/550|no mailbox|account does not exist/i', $cp['reason_text'])) {
00197                 $cp['reason'] = 550; // 550 Invalid recipient
00198             } elseif (stristr($cp['reason_text'], 'couldn\'t find any host named')) {
00199                 $cp['reason'] = 2; // Bad host
00200             } elseif (preg_match('/Error in Header|invalid Message-ID header/i', $cp['reason_text'])) {
00201                 $cp['reason'] = 554;
00202             } else {
00203                 $cp['reason'] = -1;
00204             }
00205         } elseif (strstr($c, 'The Postfix program')) { // Postfix
00206             $cp['content'] = trim($c);
00207             $parts = explode('>:', $c, 2);
00208             $cp['reason_text'] = trim($parts[1]);
00209             $cp['mailserver'] = 'Postfix';
00210             if (stristr($cp['reason_text'], '550')) {
00211                 $cp['reason'] = 550; // 550 Invalid recipient, User unknown
00212             } elseif (stristr($cp['reason_text'], '553')) {
00213                 $cp['reason'] = 553; // No such user
00214             } elseif (stristr($cp['reason_text'], '551')) {
00215                 $cp['reason'] = 551; // Mailbox full
00216             } else {
00217                 $cp['reason'] = -1;
00218             }
00219         } else { // No-named:
00220             $cp['content'] = trim($c);
00221             $cp['reason_text'] = trim(substr($c, 0, 1000));
00222             $cp['mailserver'] = 'unknown';
00223             if (preg_match('/Unknown Recipient|Delivery failed 550|Receiver not found|User not listed|recipient problem|Delivery to the following recipients failed|User unknown|recipient name is not recognized/i', $cp['reason_text'])) {
00224                 $cp['reason'] = 550; // 550 Invalid recipient, User unknown
00225             } elseif (preg_match('/over quota|mailbox full/i', $cp['reason_text'])) {
00226                 $cp['reason'] = 551;
00227             } elseif (preg_match('/Error in Header/i', $cp['reason_text'])) {
00228                 $cp['reason'] = 554;
00229             } else {
00230                 $cp['reason'] = -1;
00231             }
00232         }
00233 
00234         return $cp;
00235     }
00236 
00237     /**
00238      * Decodes a header-string with the =?....?= syntax including base64/quoted-printable encoding.
00239      *
00240      * @param   string      A string (encoded or not) from a mail header, like sender name etc.
00241      * @return  string      The input string, but with the parts in =?....?= decoded.
00242      */
00243     function decodeHeaderString($str) {
00244         $parts = explode('=?', $str, 2);
00245         if (count($parts) == 2) {
00246             list($charset, $encType, $encContent) = explode('?', $parts[1], 3);
00247             $subparts = explode('?=', $encContent, 2);
00248             $encContent = $subparts[0];
00249 
00250             switch (strtolower($encType)) {
00251                 case 'q':
00252                     $encContent = quoted_printable_decode($encContent);
00253                     $encContent = str_replace('_', ' ', $encContent);
00254                 break;
00255                 case 'b':
00256                     $encContent = base64_decode($encContent);
00257                 break;
00258             }
00259 
00260             $parts[1] = $encContent . $this->decodeHeaderString($subparts[1]); // Calls decodeHeaderString recursively for any subsequent encoded section.
00261         }
00262 
00263         return implode('', $parts);
00264     }
00265 
00266     /**
00267      * Extracts name/email parts from a header field (like 'To:' or 'From:' with name/email mixed up.
00268      *
00269      * @param   string      Value from a header field containing name/email values.
00270      * @return  array       Array with the name and email in. Email is validated, otherwise not set.
00271      */
00272     function extractNameEmail($str) {
00273         $outArr = array();
00274 
00275             // Email:
00276         $reg = '';
00277         preg_match('/<([^>]*)>/', $str, $reg);
00278         if (t3lib_div::validEmail($str)) {
00279             $outArr['email'] = $str;
00280         } elseif ($reg[1] && t3lib_div::validEmail($reg[1])) {
00281             $outArr['email'] = $reg[1];
00282                 // Find name:
00283             list($namePart) = explode($reg[0], $str);
00284             if (trim($namePart)) {
00285                 $reg = '';
00286                 preg_match('/"([^"]*)"/', $str, $reg);
00287                 if (trim($reg[1])) {
00288                     $outArr['name'] = trim($reg[1]);
00289                 } else {
00290                     $outArr['name'] = trim($namePart);
00291                 }
00292             }
00293         }
00294 
00295         return $outArr;
00296     }
00297 
00298     /**
00299      * Returns the data from the 'content-type' field. That is the boundary, charset and mime-type
00300      *
00301      * @param   string      "Content-type-string"
00302      * @return  array       key/value pairs with the result.
00303      */
00304     function getContentTypeData($contentTypeStr) {
00305         $outValue = array();
00306         $cTypeParts = t3lib_div::trimExplode(';', $contentTypeStr, 1);
00307         $outValue['_MIME_TYPE'] = $cTypeParts[0]; // content type, first value is supposed to be the mime-type, whatever after the first is something else.
00308 
00309         reset($cTypeParts);
00310         next($cTypeParts);
00311         while (list(, $v) = Each($cTypeParts)) {
00312             $reg = '';
00313             preg_match('/([^=]*)="(.*)"/i', $v, $reg);
00314             if (trim($reg[1]) && trim($reg[2])) {
00315                 $outValue[strtolower($reg[1])] = $reg[2];
00316             }
00317         }
00318 
00319         return $outValue;
00320     }
00321 
00322     /**
00323      * Makes a UNIX-date based on the timestamp in the 'Date' header field.
00324      *
00325      * @param   string      String with a timestamp according to email standards.
00326      * @return  integer     The timestamp converted to unix-time in seconds and compensated for GMT/CET ($this->serverGMToffsetMinutes);
00327      */
00328     function makeUnixDate($dateStr) {
00329         $dateParts = explode(',', $dateStr);
00330         $dateStr = count($dateParts) > 1 ? $dateParts[1] : $dateParts[0];
00331 
00332         $spaceParts = t3lib_div::trimExplode(' ', $dateStr, 1);
00333 
00334         $spaceParts[1] = $this->dateAbbrevs[strtoupper($spaceParts[1])];
00335         $timeParts = explode(':', $spaceParts[3]);
00336         $timeStamp = mktime($timeParts[0], $timeParts[1], $timeParts[2], $spaceParts[1], $spaceParts[0], $spaceParts[2]);
00337 
00338         $offset = $this->getGMToffset($spaceParts[4]);
00339         $timeStamp -= ($offset * 60); // Compensates for GMT by subtracting the number of seconds which the date is offset from serverTime
00340 
00341         return $timeStamp;
00342     }
00343 
00344     /**
00345      * Parsing the GMT offset value from a mail timestamp.
00346      *
00347      * @param   string      A string like "+0100" or so.
00348      * @return  integer     Minutes to offset the timestamp
00349      * @access private
00350      */
00351     function getGMToffset($GMT) {
00352         $GMToffset = substr($GMT, 1, 2) * 60 + substr($GMT, 3, 2);
00353         $GMToffset *= substr($GMT, 0, 1) == '+' ? 1 : -1;
00354         $GMToffset -= $this->serverGMToffsetMinutes;
00355 
00356         return $GMToffset;
00357     }
00358 
00359     /**
00360      * This returns the mail header items in an array with associative keys and the mail body part in another CONTENT field
00361      *
00362      * @param   string      Raw mail content
00363      * @param   integer     A safety limit that will put a upper length to how many header chars will be processed. Set to zero means that there is no limit. (Uses a simple substr() to limit the amount of mail data to process to avoid run-away)
00364      * @return  array       An array where each key/value pair is a header-key/value pair. The mail BODY is returned in the key 'CONTENT' if $limit is not set!
00365      */
00366     function extractMailHeader($content, $limit = 0) {
00367         if ($limit) {
00368             $content = substr($content, 0, $limit);
00369         }
00370 
00371         $lines = explode(LF, ltrim($content));
00372         $headers = array();
00373         $p = '';
00374         foreach ($lines as $k => $str) {
00375             if (!trim($str)) {
00376                 break;
00377             } // header finished
00378             $parts = explode(' ', $str, 2);
00379             if ($parts[0] && substr($parts[0], -1) == ':') {
00380                 $p = strtolower(substr($parts[0], 0, -1));
00381                 if (isset($headers[$p])) {
00382                     $headers[$p . '.'][] = $headers[$p];
00383                     $headers[$p] = '';
00384                 }
00385                 $headers[$p] = trim($parts[1]);
00386             } else {
00387                 $headers[$p] .= ' ' . trim($str);
00388             }
00389             unset($lines[$k]);
00390         }
00391         if (!$limit) {
00392             $headers['CONTENT'] = ltrim(implode(LF, $lines));
00393         }
00394 
00395         return $headers;
00396     }
00397 
00398     /**
00399      * The extended version of the extractMailHeader() which will also parse all the content body into an array and further process the header fields and decode content etc. Returns every part of the mail ready to go.
00400      *
00401      * @param   string      Raw email input.
00402      * @return  array       Multidimensional array with all parts of the message organized nicely. Use t3lib_utility_Debug::debug() to analyse it visually.
00403      */
00404     function fullParse($content) {
00405         // *************************
00406         // PROCESSING the HEADER part of the mail
00407         // *************************
00408 
00409             // Splitting header and body of mail:
00410         $mailParts = $this->extractMailHeader($content);
00411 
00412             // Decoding header values which potentially can be encoded by =?...?=
00413         $list = explode(',', 'subject,thread-topic,from,to');
00414         foreach ($list as $headerType) {
00415             if (isset($mailParts[$headerType])) {
00416                 $mailParts[$headerType] = $this->decodeHeaderString($mailParts[$headerType]);
00417             }
00418         }
00419             // Separating email/names from header fields which can contain email addresses.
00420         $list = explode(',', 'from,to,reply-to,sender,return-path');
00421         foreach ($list as $headerType) {
00422             if (isset($mailParts[$headerType])) {
00423                 $mailParts['_' . strtoupper($headerType)] = $this->extractNameEmail($mailParts[$headerType]);
00424             }
00425         }
00426             // Decode date from human-readable format to unix-time (includes compensation for GMT CET)
00427         $mailParts['_DATE'] = $this->makeUnixDate($mailParts['date']);
00428 
00429             // Transfer encodings of body content
00430         switch (strtolower($mailParts['content-transfer-encoding'])) {
00431             case 'quoted-printable':
00432                 $mailParts['CONTENT'] = quoted_printable_decode($mailParts['CONTENT']);
00433             break;
00434             case 'base64':
00435                 $mailParts['CONTENT'] = base64_decode($mailParts['CONTENT']);
00436             break;
00437         }
00438 
00439             // Content types
00440         $mailParts['_CONTENT_TYPE_DAT'] = $this->getContentTypeData($mailParts['content-type']);
00441 
00442 
00443         // *************************
00444         // PROCESSING the CONTENT part of the mail (the body)
00445         // *************************
00446 
00447         $cType = strtolower($mailParts['_CONTENT_TYPE_DAT']['_MIME_TYPE']);
00448         $cType = substr($cType, 0, 9); // only looking for 'multipart' in string.
00449         switch ($cType) {
00450             case 'multipart':
00451                 if ($mailParts['_CONTENT_TYPE_DAT']['boundary']) {
00452                     $contentSectionParts = t3lib_div::trimExplode('--' . $mailParts['_CONTENT_TYPE_DAT']['boundary'], $mailParts['CONTENT'], 1);
00453                     $contentSectionParts_proc = array();
00454 
00455                     foreach ($contentSectionParts as $k => $v) {
00456                         if (substr($v, 0, 2) == '--') {
00457                             break;
00458                         }
00459                         $contentSectionParts_proc[$k] = $this->fullParse($v);
00460                     }
00461                     $mailParts['CONTENT'] = $contentSectionParts_proc;
00462                 } else {
00463                     $mailParts['CONTENT'] = 'ERROR: No boundary found.';
00464                 }
00465             break;
00466             default:
00467                 if (strtolower($mailParts['_CONTENT_TYPE_DAT']['charset']) == 'utf-8') {
00468                     $mailParts['CONTENT'] = utf8_decode($mailParts['CONTENT']);
00469                 }
00470             break;
00471         }
00472 
00473         return $mailParts;
00474     }
00475 }
00476 
00477 if (defined('TYPO3_MODE') && isset($GLOBALS['TYPO3_CONF_VARS'][TYPO3_MODE]['XCLASS']['t3lib/class.t3lib_readmail.php'])) {
00478     include_once($GLOBALS['TYPO3_CONF_VARS'][TYPO3_MODE]['XCLASS']['t3lib/class.t3lib_readmail.php']);
00479 }
00480 ?>