class.t3lib_readmail.php

Go to the documentation of this file.
00001 <?php
00002 /***************************************************************
00003 *  Copyright notice
00004 *
00005 *  (c) 1999-2008 Kasper Skaarhoj (kasperYYYY@typo3.com)
00006 *  All rights reserved
00007 *
00008 *  This script is part of the TYPO3 project. The TYPO3 project is
00009 *  free software; you can redistribute it and/or modify
00010 *  it under the terms of the GNU General Public License as published by
00011 *  the Free Software Foundation; either version 2 of the License, or
00012 *  (at your option) any later version.
00013 *
00014 *  The GNU General Public License can be found at
00015 *  http://www.gnu.org/copyleft/gpl.html.
00016 *  A copy is found in the textfile GPL.txt and important notices to the license
00017 *  from the author is found in LICENSE.txt distributed with these scripts.
00018 *
00019 *
00020 *  This script is distributed in the hope that it will be useful,
00021 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00022 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00023 *  GNU General Public License for more details.
00024 *
00025 *  This copyright notice MUST APPEAR in all copies of the script!
00026 ***************************************************************/
00027 /**
00028  * Contains a class with functions used to read email content
00029  *
00030  * $Id: class.t3lib_readmail.php 3439 2008-03-16 19:16:51Z flyguide $
00031  * Revised for TYPO3 3.6 May 2003 by Kasper Skaarhoj
00032  *
00033  * @author  Kasper Skaarhoj <kasperYYYY@typo3.com>
00034  */
00035 /**
00036  * [CLASS/FUNCTION INDEX of SCRIPT]
00037  *
00038  *
00039  *
00040  *   83: class t3lib_readmail
00041  *
00042  *              SECTION: General
00043  *  113:     function getMessage($mailParts)
00044  *  138:     function getTextContent($content)
00045  *  153:     function getMailBoundaryParts($boundary,$content)
00046  *  173:     function getCType($str)
00047  *  196:     function analyseReturnError($c)
00048  *  251:     function decodeHeaderString($str)
00049  *  279:     function extractNameEmail($str)
00050  *  308:     function getContentTypeData($contentTypeStr)
00051  *  331:     function makeUnixDate($dateStr)
00052  *  354:     function getGMToffset($GMT)
00053  *  368:     function extractMailHeader($content,$limit=0)
00054  *  399:     function fullParse($content)
00055  *
00056  * TOTAL FUNCTIONS: 12
00057  * (This index is automatically created/updated by the extension "extdeveval")
00058  *
00059  */
00060 
00061 
00062 
00063 
00064 
00065 
00066 
00067 
00068 
00069 
00070 
00071 
00072 
00073 
00074 
00075 /**
00076  * Functions used to read email content
00077  * The class is still just a bunch of miscellaneous functions used to read content out of emails
00078  *
00079  * @author  Kasper Skaarhoj <kasperYYYY@typo3.com>
00080  * @package TYPO3
00081  * @subpackage t3lib
00082  */
00083 class t3lib_readmail {
00084     var $dateAbbrevs = array(
00085         'JAN' => 1,
00086         'FEB' => 2,
00087         'MAR' => 3,
00088         'APR' => 4,
00089         'MAY' => 5,
00090         'JUN' => 6,
00091         'JUL' => 7,
00092         'AUG' => 8,
00093         'SEP' => 9,
00094         'OCT' => 10,
00095         'NOV' => 11,
00096         'DEC' => 12
00097     );
00098     var $serverGMToffsetMinutes = 60;       // = +0100 (CET)
00099 
00100     /*******************************
00101      *
00102      * General
00103      *
00104      ********************************/
00105 
00106     /**
00107      * Returns the text content of a mail which has previously been parsed by eg. extractMailHeader()
00108      * Probably obsolete since the function fullParse() is more advanced and safer to use.
00109      *
00110      * @param   array       Output from extractMailHeader()
00111      * @return  string      The content.
00112      */
00113     function getMessage($mailParts) {
00114         if ($mailParts['content-type']) {
00115             $CType = $this->getCType($mailParts['content-type']);
00116             if ($CType['boundary']) {
00117                 $parts = $this->getMailBoundaryParts($CType['boundary'],$mailParts['CONTENT']);
00118                 $c=$this->getTextContent($parts[0]);
00119             } else {
00120                 $c=$this->getTextContent(
00121                     'Content-Type: '.$mailParts['content-type'].'
00122                     '.$mailParts['CONTENT']
00123                 );
00124             }
00125         } else {
00126             $c = $mailParts['CONTENT'];
00127         }
00128         return $c;
00129     }
00130 
00131     /**
00132      * Returns the body part of a raw mail message (including headers)
00133      * Probably obsolete since the function fullParse() is more advanced and safer to use.
00134      *
00135      * @param   string      Raw mail content
00136      * @return  string      Body of message
00137      */
00138     function getTextContent($content)   {
00139         $p=$this->extractMailHeader($content);
00140         // Here some decoding might be needed...
00141         // However we just return what is believed to be the proper notification:
00142         return $p['CONTENT'];
00143     }
00144 
00145     /**
00146      * Splits the body of a mail into parts based on the boundary string given.
00147      * Obsolete, use fullParse()
00148      *
00149      * @param   string      Boundary string used to split the content.
00150      * @param   string      BODY section of a mail
00151      * @return  array       Parts of the mail based on this
00152      */
00153     function getMailBoundaryParts($boundary,$content)   {
00154         $mParts = explode('--'.$boundary,$content);
00155         unset($mParts[0]);
00156         reset($mParts);
00157         $new=array();
00158         while(list(,$val)=each($mParts))    {
00159             if (trim($val)=='--') break;
00160             $new[] = ltrim($val);
00161         }
00162         return $new;
00163     }
00164 
00165     /**
00166      * Returns Content Type plus more.
00167      * Obsolete, use fullParse()
00168      *
00169      * @param   string      "ContentType" string with more
00170      * @return  array       parts in key/value pairs
00171      * @ignore
00172      */
00173     function getCType($str) {
00174         $parts = explode(';',$str);
00175         $cTypes=array();
00176         $cTypes['ContentType']=$parts[0];
00177         next($parts);
00178         while(list(,$ppstr)=each($parts))   {
00179             $mparts = explode('=',$ppstr,2);
00180             if (count($mparts)>1)   {
00181                 $cTypes[strtolower(trim($mparts[0]))]=ereg_replace('^"','',trim(ereg_replace('"$','',trim($mparts[1]))));
00182             } else {
00183                 $cTypes[]=$ppstr;
00184             }
00185         }
00186         return $cTypes;
00187     }
00188 
00189     /**
00190      * Analyses the return-mail content for the Dmailer module - used to find what reason there was for rejecting the mail
00191      * Used by the Dmailer, but not exclusively.
00192      *
00193      * @param   string      message body/text
00194      * @return  array       key/value pairs with analysis result. Eg. "reason", "content", "reason_text", "mailserver" etc.
00195      */
00196     function analyseReturnError($c) {
00197         $cp=array();
00198         if (strstr($c,'--- Below this line is a copy of the message.')) {       // QMAIL
00199             list($c)=explode('--- Below this line is a copy of the message.',$c);   // Splits by the QMAIL divider
00200             $cp['content']=trim($c);
00201             $parts = explode('>:',$c,2);
00202             $cp['reason_text']=trim($parts[1]);
00203             $cp['mailserver']='Qmail';
00204             if (eregi('550|no mailbox|account does not exist',$cp['reason_text']))  {
00205                 $cp['reason']=550;  // 550 Invalid recipient
00206             } elseif (stristr($cp['reason_text'],'couldn\'t find any host named')) {
00207                 $cp['reason']=2;    // Bad host
00208             } elseif (eregi('Error in Header|invalid Message-ID header',$cp['reason_text'])) {
00209                 $cp['reason']=554;
00210             } else {
00211                 $cp['reason']=-1;
00212             }
00213         } elseif (strstr($c,'The Postfix program')) {       // Postfix
00214             $cp['content']=trim($c);
00215             $parts = explode('>:',$c,2);
00216             $cp['reason_text']=trim($parts[1]);
00217             $cp['mailserver']='Postfix';
00218             if (stristr($cp['reason_text'],'550'))  {
00219                 $cp['reason']=550;  // 550 Invalid recipient, User unknown
00220             } elseif (stristr($cp['reason_text'],'553')) {
00221                 $cp['reason']=553;  // No such user
00222             } elseif (stristr($cp['reason_text'],'551')) {
00223                 $cp['reason']=551;  // Mailbox full
00224             } else {
00225                 $cp['reason']=-1;
00226             }
00227         } else {    // No-named:
00228             $cp['content']=trim($c);
00229             $cp['reason_text']=trim(substr($c,0,1000));
00230             $cp['mailserver']='unknown';
00231             if (eregi('Unknown Recipient|Delivery failed 550|Receiver not found|User not listed|recipient problem|Delivery to the following recipients failed|User unknown|recipient name is not recognized',$cp['reason_text']))   {
00232                 $cp['reason']=550;  // 550 Invalid recipient, User unknown
00233             } elseif (eregi('over quota|mailbox full',$cp['reason_text']))  {
00234                 $cp['reason']=551;
00235             } elseif (eregi('Error in Header',$cp['reason_text']))  {
00236                 $cp['reason']=554;
00237             } else {
00238                 $cp['reason']=-1;
00239             }
00240         }
00241 
00242         return $cp;
00243     }
00244 
00245     /**
00246      * Decodes a header-string with the =?....?= syntax including base64/quoted-printable encoding.
00247      *
00248      * @param   string      A string (encoded or not) from a mail header, like sender name etc.
00249      * @return  string      The input string, but with the parts in =?....?= decoded.
00250      */
00251     function decodeHeaderString($str)   {
00252         $parts = explode('=?',$str,2);
00253         if (count($parts)==2)   {
00254             list($charset,$encType,$encContent)=explode('?',$parts[1],3);
00255             $subparts =explode('?=',$encContent,2);
00256             $encContent=$subparts[0];
00257 
00258             switch(strtolower($encType))    {
00259                 case 'q':
00260                     $encContent = quoted_printable_decode($encContent);
00261                     $encContent = str_replace('_',' ',$encContent);
00262                 break;
00263                 case 'b':
00264                     $encContent=base64_decode($encContent);
00265                 break;
00266             }
00267 
00268             $parts[1]=$encContent.$this->decodeHeaderString($subparts[1]);  // Calls decodeHeaderString recursively for any subsequent encoded section.
00269         }
00270         return implode('',$parts);
00271     }
00272 
00273     /**
00274      * Extracts name/email parts from a header field (like 'To:' or 'From:' with name/email mixed up.
00275      *
00276      * @param   string      Value from a header field containing name/email values.
00277      * @return  array       Array with the name and email in. Email is validated, otherwise not set.
00278      */
00279     function extractNameEmail($str) {
00280         $outArr=array();
00281 
00282             // Email:
00283         $reg='';
00284         ereg('<([^>]*)>',$str,$reg);
00285         if (t3lib_div::validEmail($str)) {
00286             $outArr['email']=$str;
00287         } elseif ($reg[1] && t3lib_div::validEmail($reg[1]))    {
00288             $outArr['email']=$reg[1];
00289                 // Find name:
00290             list($namePart)=explode($reg[0],$str);
00291             if (trim($namePart))    {
00292                 $reg='';
00293                 ereg('"([^"]*)"',$str,$reg);
00294                 if (trim($reg[1]))  {
00295                     $outArr['name']=trim($reg[1]);
00296                 } else $outArr['name']=trim($namePart);
00297             }
00298         }
00299         return $outArr;
00300     }
00301 
00302     /**
00303      * Returns the data from the 'content-type' field. That is the boundary, charset and mime-type
00304      *
00305      * @param   string      "Content-type-string"
00306      * @return  array       key/value pairs with the result.
00307      */
00308     function getContentTypeData($contentTypeStr)    {
00309         $outValue=array();
00310         $cTypeParts = t3lib_div::trimExplode(';',$contentTypeStr,1);
00311         $outValue['_MIME_TYPE']=$cTypeParts[0]; // content type, first value is supposed to be the mime-type, whatever after the first is something else.
00312 
00313         reset($cTypeParts);
00314         next($cTypeParts);
00315         while(list(,$v)=Each($cTypeParts))  {
00316             $reg='';
00317             eregi('([^=]*)="(.*)"',$v,$reg);
00318             if (trim($reg[1]) && trim($reg[2])) {
00319                 $outValue[strtolower($reg[1])] = $reg[2];
00320             }
00321         }
00322         return $outValue;
00323     }
00324 
00325     /**
00326      * Makes a UNIX-date based on the timestamp in the 'Date' header field.
00327      *
00328      * @param   string      String with a timestamp according to email standards.
00329      * @return  integer     The timestamp converted to unix-time in seconds and compensated for GMT/CET ($this->serverGMToffsetMinutes);
00330      */
00331     function makeUnixDate($dateStr) {
00332         $dateParts=explode(',',$dateStr);
00333         $dateStr=count($dateParts)>1 ? $dateParts[1] : $dateParts[0];
00334 
00335         $spaceParts = t3lib_div::trimExplode(' ',$dateStr,1);
00336 
00337         $spaceParts[1]=$this->dateAbbrevs[strtoupper($spaceParts[1])];
00338         $timeParts = explode(':',$spaceParts[3]);
00339         $timeStamp = mktime ($timeParts[0], $timeParts[1], $timeParts[2], $spaceParts[1], $spaceParts[0], $spaceParts[2]);
00340 
00341         $offset = $this->getGMToffset($spaceParts[4]);
00342         $timeStamp-=($offset*60);   // Compensates for GMT by subtracting the number of seconds which the date is offset from serverTime
00343 
00344         return $timeStamp;
00345     }
00346 
00347     /**
00348      * Parsing the GMT offset value from a mail timestamp.
00349      *
00350      * @param   string      A string like "+0100" or so.
00351      * @return  integer     Minutes to offset the timestamp
00352      * @access private
00353      */
00354     function getGMToffset($GMT) {
00355         $GMToffset=substr($GMT,1,2)*60+substr($GMT,3,2);
00356         $GMToffset*=substr($GMT,0,1)=='+'?1:-1;
00357         $GMToffset-=$this->serverGMToffsetMinutes;
00358         return $GMToffset;
00359     }
00360 
00361     /**
00362      * This returns the mail header items in an array with associative keys and the mail body part in another CONTENT field
00363      *
00364      * @param   string      Raw mail content
00365      * @param   integer     A safety limit that will put a upper length to how many header chars will be processed. Set to zero means that there is no limit. (Uses a simple substr() to limit the amount of mail data to process to avoid run-away)
00366      * @return  array       An array where each key/value pair is a header-key/value pair. The mail BODY is returned in the key 'CONTENT' if $limit is not set!
00367      */
00368     function extractMailHeader($content,$limit=0)   {
00369         if ($limit) $content = substr($content,0,$limit);
00370 
00371         $lines=explode(chr(10),ltrim($content));
00372         $headers=array();
00373         $p='';
00374         while(list($k,$str)=each($lines))       {
00375             if (!trim($str))    break;  // header finished
00376             $parts = explode(' ',$str,2);
00377             if ($parts[0] && substr($parts[0],-1)==':') {
00378                 $p=strtolower(substr($parts[0],0,-1));
00379                 if (isset($headers[$p]))    {
00380                     $headers[$p.'.'][]=$headers[$p];
00381                     $headers[$p]='';
00382                 }
00383                 $headers[$p]=trim($parts[1]);
00384             } else {
00385                 $headers[$p].=' '.trim($str);
00386             }
00387             unset($lines[$k]);
00388         }
00389         if (!$limit)    $headers['CONTENT']=ltrim(implode(chr(10),$lines));
00390         return $headers;
00391     }
00392 
00393     /**
00394      * The extended version of the extractMailHeader() which will also parse all the content body into an array and further process the header fields and decode content etc. Returns every part of the mail ready to go.
00395      *
00396      * @param   string      Raw email input.
00397      * @return  array       Multidimensional array with all parts of the message organized nicely. Use t3lib_div::debug() to analyse it visually.
00398      */
00399     function fullParse($content)    {
00400             // *************************
00401             // PROCESSING the HEADER part of the mail
00402             // *************************
00403 
00404             // Splitting header and body of mail:
00405         $mailParts = $this->extractMailHeader($content);
00406 
00407             // Decoding header values which potentially can be encoded by =?...?=
00408         $list = explode(',','subject,thread-topic,from,to');
00409         while(list(,$headerType)=each($list))   {
00410             if (isset($mailParts[$headerType])) $mailParts[$headerType]=$this->decodeHeaderString($mailParts[$headerType]);
00411         }
00412             // Separating email/names from header fields which can contain email addresses.
00413         $list = explode(',','from,to,reply-to,sender,return-path');
00414         while(list(,$headerType)=each($list))   {
00415             if (isset($mailParts[$headerType])) {
00416                 $mailParts['_'.strtoupper($headerType)]=$this->extractNameEmail($mailParts[$headerType]);
00417             }
00418         }
00419             // Decode date from human-readable format to unix-time (includes compensation for GMT CET)
00420         $mailParts['_DATE']=$this->makeUnixDate($mailParts['date']);
00421 
00422             // Transfer encodings of body content
00423         switch(strtolower($mailParts['content-transfer-encoding'])) {
00424             case 'quoted-printable':
00425                 $mailParts['CONTENT']=quoted_printable_decode($mailParts['CONTENT']);
00426             break;
00427             case 'base64':
00428                 $mailParts['CONTENT']=base64_decode($mailParts['CONTENT']);
00429             break;
00430         }
00431 
00432             // Content types
00433         $mailParts['_CONTENT_TYPE_DAT']=$this->getContentTypeData($mailParts['content-type']);
00434 
00435 
00436             // *************************
00437             // PROCESSING the CONTENT part of the mail (the body)
00438             // *************************
00439         $cType = strtolower($mailParts['_CONTENT_TYPE_DAT']['_MIME_TYPE']);
00440         $cType = substr($cType,0,9);    // only looking for 'multipart' in string.
00441         switch($cType)  {
00442 /*          case 'multipart/mixed':
00443             case 'multipart/related':
00444             case 'multipart/alternative':
00445             case 'multipart/signed':
00446 */
00447             case 'multipart':
00448                 if ($mailParts['_CONTENT_TYPE_DAT']['boundary'])    {
00449                     $contentSectionParts = t3lib_div::trimExplode('--'.$mailParts['_CONTENT_TYPE_DAT']['boundary'],$mailParts['CONTENT'],1);
00450                     $contentSectionParts_proc=array();
00451 
00452                     reset($contentSectionParts);
00453                     while(list($k,$v)=each($contentSectionParts))   {
00454                         if (substr($v,0,2)=='--')   break;
00455                         $contentSectionParts_proc[$k]=$this->fullParse($v);
00456                     }
00457                     $mailParts['CONTENT']=$contentSectionParts_proc;
00458                 } else $mailParts['CONTENT'] = 'ERROR: No boundary found.';
00459             break;
00460             default:
00461                 if (strtolower($mailParts['_CONTENT_TYPE_DAT']['charset'])=='utf-8')    {
00462                     $mailParts['CONTENT']=utf8_decode($mailParts['CONTENT']);
00463                 }
00464             break;
00465         }
00466         return $mailParts;
00467     }
00468 }
00469 
00470 if (defined('TYPO3_MODE') && $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['t3lib/class.t3lib_readmail.php'])  {
00471     include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['t3lib/class.t3lib_readmail.php']);
00472 }
00473 ?>

Generated on Sat Jan 3 04:23:27 2009 for TYPO3 API by  doxygen 1.4.7