php解析mht文件转换成html的实例
php解析mht文件,使用编辑器打开可以看到base64编码所以,mht是可以转换成html的。
- <?php
- /**
- * 针对Mht格式的文件进行解析
- * 使用例子:
- *
- * function mhtmlParseBody($filename) {
- if (file_exists ( $filename )) {
- if (is_dir ( $filename )) return false;
- $filename = strtolower ( $filename );
- if (strpos ( $filename, '.mht', 1 ) == FALSE) return false;
- $o_mhtml = new mhtml ();
- $o_mhtml->set_file ( $filename );
- $o_mhtml->extract ();
- return $o_mhtml->get_part_to_file(0);
- }
- return null;
- }
- function mhtmlParseAll($filename) {
- if (file_exists ( $filename )) {
- if (is_dir ( $filename )) return false;
- $filename = strtolower ( $filename );
- if (strpos ( $filename, '.mht', 1 ) == FALSE) return false;
- $o_mhtml = new mhtml ();
- $o_mhtml->set_file ( $filename );
- $o_mhtml->extract ();
- return $o_mhtml->get_all_part_file();
- }
- return null;
- }
- */
- classmhtparse {
- var$file=''
- var$boundary=''
- var$filedata=''
- var$countparts= 1;
- var$log=''
- functionextract() {
- $this->read_filedata ();
- $this->file_parts ();
- return1;
- }
- functionset_file($p) {
- $this->file =$p;
- }
- functionget_log() {
- return$this->log;
- }
- functionfile_parts() {
- $lines=<a href="/tags.php/explode/" target="_blank">explode</a>("\n",<a href="/tags.php/substr/" target="_blank">substr</a>($this->filedata, 0, 8192 ) );
- <a href="/tags.php/foreach/" target="_blank">foreach</a>($linesas$line) {
- $line= trim ($line);
- if(strpos($line,'=') !== FALSE) {
- if(strpos($line,'boundary', 0 ) !== FALSE) {
- $range=$this->getrange ($line,'"','"', 0 );
- $this->boundary ="--".$range['range'];
- $this->filedata =str_replace($line,'',$this->filedata );
- break;
- }
- }
- }
- if($this->boundary !='') {
- $this->filedata =explode($this->boundary,$this->filedata );
- unset ($this->filedata [0] );
- $this->filedata =array_values($this->filedata );
- $this->countparts =count($this->filedata );
- }else{
- $tmp=$this->filedata;
- $this->filedata =array(
- $tmp
- );
- }
- }
- functionget_all_part_file() {
- return$this->filedata;
- }
- functionget_part_to_file($i) {
- $line_data_start= 0;
- $encoding=''
- $part_lines=explode("\n", ltrim ($this->filedata [$i] ) );
- foreach($part_linesas$line_id=>$line) {
- $line= trim ($line);
- if($line=='') {
- if(trim ($part_lines[0] ) =='--')
- return1;
- $line_data_start=$line_id;
- break;
- }
- if(strpos($line,':') !== FALSE) {
- $pos=strpos($line,':');
- $k=strtolower( trim (substr($line, 0,$pos) ) );
- $v= trim (substr($line,$pos+ 1,strlen($line) ) );
- if($k=='content-transfer-encoding') {
- $encoding=$v;
- }
- if($k=='content-location') {
- $location=$v;
- }
- if($k=='content-type') {
- $contenttype=$v;
- }
- }
- }
- foreach($part_linesas$line_id=>$line) {
- if($line_id<=$line_data_start)
- $part_lines[$line_id] =''
- }
- $part_lines= implode ('',$part_lines);
- if($encoding=='base64')
- $part_lines=base64_decode($part_lines);
- elseif($encoding=='quoted-printable')
- $part_lines= imap_qprint ($part_lines);
- return$part_lines;
- }
- functionread_filedata() {
- $handle=<a href="/tags.php/fopen/" target="_blank">fopen</a>($this->file,'r');
- $this->filedata =fread($handle,filesize($this->file ) );
- fclose ($handle);
- }
- functiongetrange(&$subject,$Beginmark_str='{',$Endmark_str='}',$Start_pos= 0) {
- /*
- * $str="sssss { x { xx } {xx{xx } x} x} sssss"; $range=string::getRange($str,'{','}',0); echo $range['range']; //tulem: " x { xx } {xx{xx } x} x" echo $range['behin']; //tulem: 6 echo $range['end']; //tulem: 30 (' ') -- l5pumärgist järgnev out: array('range'=>$Range,'begin'=>$Begin_firstOccurence_pos,'end'=>$End_sequel_pos) | false v1.1 2004-2006,Uku-Kaarel J5esaar,ukjoesaar@hot.ee,http://www.hot.ee/ukjoesaar,+3725110693
- */
- if(emptyempty($Beginmark_str))
- $Beginmark_str='{'
- $Beginmark_str_len=strlen($Beginmark_str);
- if(emptyempty($Endmark_str))
- $Endmark_str='}'
- $Endmark_str_len=strlen($Endmark_str);
- /* $Start_pos_cache = 0; */
- do{
- /* !algus */
- if(!is_int($Begin_firstOccurence_pos))
- $Start_pos_cache=$Start_pos;
- /* ?algus-test */
- $Start_pos_cache= @strpos($subject,$Beginmark_str,$Start_pos_cache);
- /* this is possible start for range */
- if(is_int($Start_pos_cache)) {
- /* skip */
- $Start_pos_cache= ($Start_pos_cache+$Beginmark_str_len);
- /* test possible range start pos */
- if(is_int($Begin_firstOccurence_pos)) {
- if($Start_pos_cache<$range_end_pos)
- $rangeClean= 0;
- elseif($Start_pos_cache>$range_end_pos)
- $rangeClean= 1;
- }
- /* here it is */
- if(!is_int($Begin_firstOccurence_pos))
- $Begin_firstOccurence_pos=$Start_pos_cache;
- }/* VIGA NR 0 ALGUST EI OLE */
- if(!is_int($Start_pos_cache)) {
- /* !algus */
- /* VIGA NR 1 ALGUSMARKI EI LEITUD : VIIMANE VOIMALIK ALGUS */
- if(is_int($Begin_firstOccurence_pos)and($Start_pos_cache<$range_end_pos))
- $rangeClean= 1;
- else
- returnfalse;
- }
- if(is_int($Begin_firstOccurence_pos)and($rangeClean!= 1)) {
- if(!is_int($End_pos_cache))
- $End_sequel_pos=$Begin_firstOccurence_pos;
- $End_pos_cache=strpos($subject,$Endmark_str,$End_sequel_pos);
- /* ok */
- if(is_int($End_pos_cache)and($rangeClean!= 1)) {
- $range_current_lenght= ($End_pos_cache-$Begin_firstOccurence_pos);
- $End_sequel_pos= ($End_pos_cache+$Endmark_str_len);
- $range_end_pos=$End_pos_cache;
- }
- /* VIGA NR 2 LOPPU EI LEITUD */
- if(!is_int($End_pos_cache))
- if($End_pos_cache== false)
- returnfalse;
- }
- }while($rangeClean< 1 );
- if(is_int($Begin_firstOccurence_pos)andis_int($range_current_lenght))
- $Range=substr($subject,$Begin_firstOccurence_pos,$range_current_lenght);
- else
- returnfalse;
- returnarray(
- 'range'=>$Range,
- 'begin'=>$Begin_firstOccurence_pos,
- 'end'=>$End_sequel_pos
- );
- }// end getrange()
- }// class
- ?>