php解析mht文件转换成html的实例

php解析mht文件,使用编辑器打开可以看到base64编码所以,mht是可以转换成html的。

  1. <?php
  2. /**
  3. * 针对Mht格式的文件进行解析
  4. * 使用例子:
  5. *
  6. * function mhtmlParseBody($filename) {
  7. if (file_exists ( $filename )) {
  8. if (is_dir ( $filename )) return false;
  9. $filename = strtolower ( $filename );
  10. if (strpos ( $filename, '.mht', 1 ) == FALSE) return false;
  11. $o_mhtml = new mhtml ();
  12. $o_mhtml->set_file ( $filename );
  13. $o_mhtml->extract ();
  14. return $o_mhtml->get_part_to_file(0);
  15. }
  16. return null;
  17. }
  18. function mhtmlParseAll($filename) {
  19. if (file_exists ( $filename )) {
  20. if (is_dir ( $filename )) return false;
  21. $filename = strtolower ( $filename );
  22. if (strpos ( $filename, '.mht', 1 ) == FALSE) return false;
  23. $o_mhtml = new mhtml ();
  24. $o_mhtml->set_file ( $filename );
  25. $o_mhtml->extract ();
  26. return $o_mhtml->get_all_part_file();
  27. }
  28. return null;
  29. }
  30. */
  31. classmhtparse {
  32. var$file=''
  33. var$boundary=''
  34. var$filedata=''
  35. var$countparts= 1;
  36. var$log=''
  37. functionextract() {
  38. $this->read_filedata ();
  39. $this->file_parts ();
  40. return1;
  41. }
  42. functionset_file($p) {
  43. $this->file =$p;
  44. }
  45. functionget_log() {
  46. return$this->log;
  47. }
  48. functionfile_parts() {
  49. $lines=<a href="/tags.php/explode/" target="_blank">explode</a>("\n",<a href="/tags.php/substr/" target="_blank">substr</a>($this->filedata, 0, 8192 ) );
  50. <a href="/tags.php/foreach/" target="_blank">foreach</a>($linesas$line) {
  51. $line= trim ($line);
  52. if(strpos($line,'=') !== FALSE) {
  53. if(strpos($line,'boundary', 0 ) !== FALSE) {
  54. $range=$this->getrange ($line,'"','"', 0 );
  55. $this->boundary ="--".$range['range'];
  56. $this->filedata =str_replace($line,'',$this->filedata );
  57. break;
  58. }
  59. }
  60. }
  61. if($this->boundary !='') {
  62. $this->filedata =explode($this->boundary,$this->filedata );
  63. unset ($this->filedata [0] );
  64. $this->filedata =array_values($this->filedata );
  65. $this->countparts =count($this->filedata );
  66. }else{
  67. $tmp=$this->filedata;
  68. $this->filedata =array(
  69. $tmp
  70. );
  71. }
  72. }
  73. functionget_all_part_file() {
  74. return$this->filedata;
  75. }
  76. functionget_part_to_file($i) {
  77. $line_data_start= 0;
  78. $encoding=''
  79. $part_lines=explode("\n", ltrim ($this->filedata [$i] ) );
  80. foreach($part_linesas$line_id=>$line) {
  81. $line= trim ($line);
  82. if($line=='') {
  83. if(trim ($part_lines[0] ) =='--')
  84. return1;
  85. $line_data_start=$line_id;
  86. break;
  87. }
  88. if(strpos($line,':') !== FALSE) {
  89. $pos=strpos($line,':');
  90. $k=strtolower( trim (substr($line, 0,$pos) ) );
  91. $v= trim (substr($line,$pos+ 1,strlen($line) ) );
  92. if($k=='content-transfer-encoding') {
  93. $encoding=$v;
  94. }
  95. if($k=='content-location') {
  96. $location=$v;
  97. }
  98. if($k=='content-type') {
  99. $contenttype=$v;
  100. }
  101. }
  102. }
  103. foreach($part_linesas$line_id=>$line) {
  104. if($line_id<=$line_data_start)
  105. $part_lines[$line_id] =''
  106. }
  107. $part_lines= implode ('',$part_lines);
  108. if($encoding=='base64')
  109. $part_lines=base64_decode($part_lines);
  110. elseif($encoding=='quoted-printable')
  111. $part_lines= imap_qprint ($part_lines);
  112. return$part_lines;
  113. }
  114. functionread_filedata() {
  115. $handle=<a href="/tags.php/fopen/" target="_blank">fopen</a>($this->file,'r');
  116. $this->filedata =fread($handle,filesize($this->file ) );
  117. fclose ($handle);
  118. }
  119. functiongetrange(&$subject,$Beginmark_str='{',$Endmark_str='}',$Start_pos= 0) {
  120. /*
  121. * $str="sssss { x { xx } {xx{xx } x} x} sssss"; $range=string::getRange($str,'{','}',0); echo $range['range']; //tulem: " x { xx } {xx{xx } x} x" echo $range['behin']; //tulem: 6 echo $range['end']; //tulem: 30 (' ') -- l5pumärgist järgnev out: array('range'=>$Range,'begin'=>$Begin_firstOccurence_pos,'end'=>$End_sequel_pos) | false v1.1 2004-2006,Uku-Kaarel J5esaar,ukjoesaar@hot.ee,http://www.hot.ee/ukjoesaar,+3725110693
  122. */
  123. if(emptyempty($Beginmark_str))
  124. $Beginmark_str='{'
  125. $Beginmark_str_len=strlen($Beginmark_str);
  126. if(emptyempty($Endmark_str))
  127. $Endmark_str='}'
  128. $Endmark_str_len=strlen($Endmark_str);
  129. /* $Start_pos_cache = 0; */
  130. do{
  131. /* !algus */
  132. if(!is_int($Begin_firstOccurence_pos))
  133. $Start_pos_cache=$Start_pos;
  134. /* ?algus-test */
  135. $Start_pos_cache= @strpos($subject,$Beginmark_str,$Start_pos_cache);
  136. /* this is possible start for range */
  137. if(is_int($Start_pos_cache)) {
  138. /* skip */
  139. $Start_pos_cache= ($Start_pos_cache+$Beginmark_str_len);
  140. /* test possible range start pos */
  141. if(is_int($Begin_firstOccurence_pos)) {
  142. if($Start_pos_cache<$range_end_pos)
  143. $rangeClean= 0;
  144. elseif($Start_pos_cache>$range_end_pos)
  145. $rangeClean= 1;
  146. }
  147. /* here it is */
  148. if(!is_int($Begin_firstOccurence_pos))
  149. $Begin_firstOccurence_pos=$Start_pos_cache;
  150. }/* VIGA NR 0 ALGUST EI OLE */
  151. if(!is_int($Start_pos_cache)) {
  152. /* !algus */
  153. /* VIGA NR 1 ALGUSMARKI EI LEITUD : VIIMANE VOIMALIK ALGUS */
  154. if(is_int($Begin_firstOccurence_pos)and($Start_pos_cache<$range_end_pos))
  155. $rangeClean= 1;
  156. else
  157. returnfalse;
  158. }
  159. if(is_int($Begin_firstOccurence_pos)and($rangeClean!= 1)) {
  160. if(!is_int($End_pos_cache))
  161. $End_sequel_pos=$Begin_firstOccurence_pos;
  162. $End_pos_cache=strpos($subject,$Endmark_str,$End_sequel_pos);
  163. /* ok */
  164. if(is_int($End_pos_cache)and($rangeClean!= 1)) {
  165. $range_current_lenght= ($End_pos_cache-$Begin_firstOccurence_pos);
  166. $End_sequel_pos= ($End_pos_cache+$Endmark_str_len);
  167. $range_end_pos=$End_pos_cache;
  168. }
  169. /* VIGA NR 2 LOPPU EI LEITUD */
  170. if(!is_int($End_pos_cache))
  171. if($End_pos_cache== false)
  172. returnfalse;
  173. }
  174. }while($rangeClean< 1 );
  175. if(is_int($Begin_firstOccurence_pos)andis_int($range_current_lenght))
  176. $Range=substr($subject,$Begin_firstOccurence_pos,$range_current_lenght);
  177. else
  178. returnfalse;
  179. returnarray(
  180. 'range'=>$Range,
  181. 'begin'=>$Begin_firstOccurence_pos,
  182. 'end'=>$End_sequel_pos
  183. );
  184. }// end getrange()
  185. }// class
  186. ?>