php正则替换处理HTML页面的方法

这篇文章主要介绍了php正则替换处理HTML页面的方法,涉及php针对html页面常见元素的匹配技巧,需要的朋友可以参考下,本文实例讲述了php正则替换处理HTML页面的方法,分享给大家供大家参考,具体如下:

  1. <?php
  2. if(!defined('BASEPATH')) exit('No direct script access allowed');
  3. /**
  4. * HTML替换处理类,考虑如下几种替换
  5. * 1. img src : '/<img(.+?)src=([\'\" ])?(.+?)([ >]+?)/i'
  6. * 2. a href : '/<a(.+?)href=([\'\" ])?(.+?)([ >]+?)/i'
  7. * 3. ifram.src : '/<iframe(.+?)src=([\'\" ])?(.+?)([ >]+?)/i'
  8. * 4. frame src : '/<frame(.+?)src=([\'\" ])?(.+?)([ >]+?)/i'
  9. * 5. js : '/window.open([( ]+?)([\'" ]+?)(.+?)([ )+?])/i'
  10. * 6. css : '/background(.+?)url([( ])([\'" ]+?)(.+?)([ )+?])/i'
  11. */
  12. class Myreplace {
  13. private $moudle_array = array('udata','tdata','tresult','dresult');
  14. private $content;
  15. private $relative_dirname;
  16. private $projectid;
  17. private $moudle;
  18. function __construct() {
  19. $this->CI = &get_instance ();
  20. }
  21. /**
  22. * 替换
  23. * @param string $content HTML内容
  24. * @param string $relative 相对路径
  25. * @param int $projectid 项目id
  26. * @moudle string $moudle 模板标识: udata,tdata,tresult,dresult
  27. */
  28. public function my_replace($content,$relative,$projectid,$moudle) {
  29. $this->content = $content;
  30. $this->relative_dirname = $relative;
  31. $this->projectid = $projectid;
  32. if(in_array(strtolower($moudle),$this->moudle_array))
  33. $this->moudle = $moudle;
  34. else exit;
  35. switch($this->moudle) {
  36. case 'udata':
  37. $this->CI->load->model('mupload_data','model');
  38. break;
  39. case 'tdata':
  40. $this->CI->load->model('taskdata','model');
  41. break;
  42. case 'tresult':
  43. $this->CI->load->model('taskresult','model');
  44. break;
  45. case 'dresult':
  46. $this->CI->load->model('dmsresult','model');
  47. break;
  48. default:
  49. break;
  50. }
  51. $pattern = '/<img(.+?)src=([\'\" ])?(.+?)([ >]+?)/i';
  52. $content = preg_replace_callback( $pattern, array($this, 'image_replace') , $content );
  53. $pattern = '/<a(.+?)href=([\'\" ])?(.+?)([ >]+?)/i';
  54. $content = preg_replace_callback( $pattern, array($this, 'html_replace') , $content );
  55. $pattern = '/<iframe(.+?)src=([\'\" ])?(.+?)([ >]+?)/i';
  56. $content = preg_replace_callback( $pattern, array($this, 'iframe_replace') , $content );
  57. $pattern = '/<frame(.+?)src=([\'\" ])?(.+?)([ >]+?)/i';
  58. $content = preg_replace_callback( $pattern, array($this, 'frame_replace'), $content );
  59. $pattern = '/window.open([( ]+?)([\'" ]+?)(.+?)([ )]+?)/i';
  60. $content = preg_replace_callback( $pattern, array($this, 'js_replace'), $content );
  61. $pattern = '/background(.+?)url([( ])([\'" ]+?)(.+?)([ )+?])/i';
  62. $content = preg_replace_callback( $pattern, array($this, 'css_replace'), $content);
  63. return $content;
  64. }
  65. private function image_replace($matches) {
  66. if(count($matches) < 4) return '';
  67. if( emptyempty($matches[3]) ) return '';
  68. $matches[3] = rtrim($matches[3],'\'"/');
  69. //获取图片的id
  70. $parent_dir_num = substr_count( $matches[3], '../');
  71. $relative_dirname = $this->relative_dirname;
  72. for($i=0; $i<$parent_dir_num; $i++) {
  73. $relative_dirname = substr( $relative_dirname, 0, strrpos($relative_dirname,"/") );
  74. }
  75. $relativepath = rtrim($relative_dirname,'/') . '/'.ltrim($matches[3],'./');
  76. $image_id = $this->CI->model->get_id_by_path_and_project($relativepath,$this->projectid);
  77. //输出
  78. if( !emptyempty($image_id) ) {
  79. if($this->moudle == 'dresult') {
  80. return "<img".$matches[1]."src=".$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/readpic/$image_id?pid=".$this->projectid .$matches[2]. $matches[4];
  81. } else {
  82. return "<img".$matches[1]."src=".$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/picfile/$image_id?pid=".$this->projectid .$matches[2]. $matches[4];
  83. }
  84. } else {
  85. return "<img".$matches[1]."src=".$matches[2].$matches[3].$matches[2].$matches[4];
  86. }
  87. }
  88. private function html_replace( $matches ) {
  89. if(count($matches) < 4) return '';
  90. if( emptyempty($matches[3]) ) return '';
  91. //如果href的链接($matches[3])以http或www或mailto开始,则不进行处理
  92. //if(preg_match('/^[http|www|mailto](.+?)/i',$matches[3]))
  93. // return "<a".$matches[1]."href=".$matches[2].$matches[3].$matches[4];
  94. $matches[3] = rtrim($matches[3],'\'"/');
  95. //处理锚点
  96. if(substr_count($matches[3],'#')>0)
  97. $matches[3] = substr($matches[3],0,strrpos($matches[3],'#'));
  98. //获取html的id
  99. $parent_dir_num = substr_count( $matches[3], '../');
  100. $relative_dirname = $this->relative_dirname;
  101. for($i=0; $i<$parent_dir_num; $i++) {
  102. $relative_dirname = substr( $relative_dirname, 0, strrpos($relative_dirname,"/") );
  103. }
  104. $relativepath = rtrim($relative_dirname,'/') . '/'.ltrim($matches[3],'./');
  105. $txtfile_id = $this->CI->model->get_id_by_path_and_project($relativepath,$this->projectid);
  106. //输出
  107. if( !emptyempty($txtfile_id ) ) {
  108. if($this->moudle == 'dresult') {
  109. return "<a".$matches[1]."href=".$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/readfile/$txtfile_id?pid=".$this->projectid .$matches[2].$matches[4];
  110. } else {
  111. return "<a".$matches[1]."href=".$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/txtfile/$txtfile_id?pid=".$this->projectid .$matches[2].$matches[4];
  112. }
  113. } else {
  114. return "<a".$matches[1]."href=".$matches[2].$matches[3].$matches[2].$matches[4];
  115. }
  116. }
  117. private function iframe_replace( $matches ) {
  118. if(count($matches) < 4) return '';
  119. if( emptyempty($matches[3]) ) return '';
  120. $matches[3] = rtrim($matches[3],'\'"/');
  121. //处理锚点
  122. if(substr_count($matches[3],'#')>0)
  123. $matches[3] = substr($matches[3],0,strrpos($matches[3],'#'));
  124. //获取html的id
  125. $parent_dir_num = substr_count( $matches[3], '../');
  126. $relative_dirname = $this->relative_dirname;
  127. for($i=0; $i<$parent_dir_num; $i++) {
  128. $relative_dirname = substr( $relative_dirname, 0, strrpos($relative_dirname,"/") );
  129. }
  130. $relativepath = rtrim($relative_dirname,'/') . '/'.ltrim($matches[3],'./');
  131. $txtfile_id = $this->CI->model->get_id_by_path_and_project($relativepath,$this->projectid);
  132. //输出
  133. if( !emptyempty($txtfile_id ) ) {
  134. if($this->moudle == 'dresult') {
  135. return "<iframe".$matches[1]."src=".$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/readfile/$txtfile_id?pid=".$this->projectid .$matches[2].$matches[4];
  136. } else {
  137. return "<iframe".$matches[1]."src=".$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/txtfile/$txtfile_id?pid=".$this->projectid .$matches[2].$matches[4];
  138. }
  139. } else {
  140. return "<iframe".$matches[1]."src=".$matches[2].$matches[3].$matches[2].$matches[4];
  141. }
  142. }
  143. private function frame_replace( $matches ) {
  144. if(count($matches) < 4) return '';
  145. if( emptyempty($matches[3]) ) return '';
  146. $matches[3] = rtrim($matches[3],'\'"/');
  147. //处理锚点
  148. if(substr_count($matches[3],'#')>0)
  149. $matches[3] = substr($matches[3],0,strrpos($matches[3],'#'));
  150. //获取html的id
  151. $parent_dir_num = substr_count( $matches[3], '../');
  152. $relative_dirname = $this->relative_dirname;
  153. for($i=0; $i<$parent_dir_num; $i++) {
  154. $relative_dirname = substr( $relative_dirname, 0, strrpos($relative_dirname,"/") );
  155. }
  156. $relativepath = rtrim($relative_dirname,'/') . '/'.ltrim($matches[3],'./');
  157. $txtfile_id = $this->CI->model->get_id_by_path_and_project($relativepath,$this->projectid);
  158. //输出
  159. if( !emptyempty($txtfile_id ) ) {
  160. if($this->moudle == 'dresult') {
  161. return "<frame".$matches[1]."src=".$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/readfile/$txtfile_id?pid=".$this->projectid.$matches[2].$matches[4];
  162. } else {
  163. return "<frame".$matches[1]."src=".$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/txtfile/$txtfile_id?pid=".$this->projectid.$matches[2].$matches[4];
  164. }
  165. } else {
  166. return "<frame".$matches[1]."src=".$matches[2].$matches[3].$matches[2].$matches[4];
  167. }
  168. }
  169. private function js_replace( $matches ){
  170. if(count($matches) < 4) return '';
  171. if( emptyempty($matches[3]) ) return '';
  172. //处理链接
  173. $arr_html = split(',',$matches[3]);
  174. $href = $arr_html[0];
  175. $other = '';
  176. for($i=0; $i<count($arr_html); $i++)
  177. $other = $arr_html[$i].", ";
  178. $other = rtrim($other,"\, ");
  179. $href =rtrim($href,'\'\"');
  180. //处理锚点
  181. if(substr_count($href,'#')>0)
  182. return "window.open".$matches[1].$matches[2].$matches[3].$matches[4];;
  183. //获取html的id
  184. $parent_dir_num = substr_count( $href, '../');
  185. $relative_dirname = $this->relative_dirname;
  186. for($i=0; $i<$parent_dir_num; $i++) {
  187. $relative_dirname = substr( $relative_dirname, 0, strrpos($relative_dirname,"/") );
  188. }
  189. $relativepath = rtrim($relative_dirname,'/') . '/'.ltrim($href,'./');
  190. $txtfile_id = $this->CI->model->get_id_by_path_and_project($relativepath,$this->projectid);
  191. //输出
  192. if( !emptyempty($txtfile_id ) ) {
  193. if($this->moudle == 'dresult') {
  194. return "window.open".$matches[1].$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/readfile/$txtfile_id?p,'.$other.$matches[4];
  195. } else {
  196. return "window.open".$matches[1].$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/txtfile/$txtfile_id?p,'.$other.$matches[4];
  197. }
  198. } else {
  199. return "window.open".$matches[1].$matches[2].$matches[3].$matches[4];
  200. }
  201. }
  202. private function css_replace( $matches ) {
  203. if(count($matches) < 5) return '';
  204. if( emptyempty($matches[4]) ) return '';
  205. $matches[4] = rtrim($matches[4],'\'"/');
  206. //获取图片的id
  207. $parent_dir_num = substr_count( $matches[4], '../');
  208. $relative_dirname = $this->relative_dirname;
  209. for($i=0; $i<$parent_dir_num; $i++) {
  210. $relative_dirname = substr( $relative_dirname, 0, strrpos($relative_dirname,"/") );
  211. }
  212. $relativepath = rtrim($relative_dirname,'/') . '/'.ltrim($matches[4],'./');
  213. $image_id = $this->CI->model->get_id_by_path_and_project($relativepath,$this->projectid);
  214. //输出
  215. if( !emptyempty($image_id) ) {
  216. if($this->moudle == 'dresult') {
  217. return "background".$matches[1]."url".$matches[2].$matches[3].$this->CI->config->item("base_url")."cdms/".$this->moudle."/readpic/$image_id?pid=".$this->projectid .$matches[3]. $matches[5];
  218. } else {
  219. return "background".$matches[1]."url".$matches[2].$matches[3].$this->CI->config->item("base_url")."cdms/".$this->moudle."/picfile/$image_id?pid=".$this->projectid .$matches[3]. $matches[5];
  220. }
  221. } else {
  222. return "background".$matches[1]."url".$matches[2].$matches[3].$matches[4].$matches[3].$matches[5];
  223. }
  224. }
  225. }
  226. /* End of Myreplace.php */
  227. /* Location: /application/libraries/Myreplace.php */