PHP采集程序中常用的函数

函数描述及例子 PHP采集程序中常用的函数 查询关键字 PHP采集程序中常用的函数

  1. //获得当前的脚本网址
  2. function get_php_url(){
  3. if(!emptyempty($_SERVER["REQUEST_URI"])){
  4. $scriptName = $_SERVER["REQUEST_URI"];
  5. $nowurl = $scriptName;
  6. }else{
  7. $scriptName = $_SERVER["PHP_SELF"];
  8. if(emptyempty($_SERVER["QUERY_STRING"])) $nowurl = $scriptName;
  9. else $nowurl = $scriptName."?".$_SERVER["QUERY_STRING"];
  10. }
  11. return $nowurl;
  12. }
  13. //把全角数字转为半角数字
  14. function GetAlabNum($fnum){
  15. $nums = array("0","1","2","3","4","5","6","7","8","9");
  16. $fnums = "0123456789";
  17. for($i=0;$i<=9;$i++) $fnum = str_replace($nums[$i],$fnums[$i],$fnum);
  18. $fnum = ereg_replace("[^0-9\.]|^0{1,}","",$fnum);
  19. if($fnum=="") $fnum=0;
  20. return $fnum;
  21. }
  22. //去除HTML标记
  23. function Text2Html($txt){
  24. $txt = str_replace(" "," ",$txt);
  25. $txt = str_replace("<","<",$txt);
  26. $txt = str_replace(">",">",$txt);
  27. $txt = preg_replace("/[\r\n]{1,}/isU","
  28. \r\n",$txt);
  29. return $txt;
  30. }
  31. //清除HTML标记
  32. function ClearHtml($str){
  33. $str = str_replace('<','<',$str);
  34. $str = str_replace('>','>',$str);
  35. return $str;
  36. }
  37. //相对路径转化成绝对路径
  38. function relative_to_absolute($content, $feed_url) {
  39. preg_match('/(http|https|ftp):\/\//', $feed_url, $protocol);
  40. $server_url = preg_replace("/(http|https|ftp|news):\/\//", "", $feed_url);
  41. $server_url = preg_replace("/\/.*/", "", $server_url);
  42. if ($server_url == '') {
  43. return $content;
  44. }
  45. if (isset($protocol[0])) {
  46. $new_content = preg_replace('/href="\//', 'href="'.$protocol[0].$server_url.'/', $content);
  47. $new_content = preg_replace('/src="\//', 'src="'.$protocol[0].$server_url.'/', $new_content);
  48. } else {
  49. $new_content = $content;
  50. }
  51. return $new_content;
  52. }
  53. //取得所有链接
  54. function get_all_url($code){
  55. preg_match_all('/<a\s+href=["|\']?([^>"\' ]+)["|\']?\s*[^>]*>([^>]+)<\/a>/i',$code,$arr);
  56. return array('name'=>$arr[2],'url'=>$arr[1]);
  57. }
  58. //获取指定标记中的内容
  59. function get_tag_data($str, $start, $end){
  60. if ( $start == '' || $end == '' ){
  61. return;
  62. }
  63. $str = explode($start, $str);
  64. $str = explode($end, $str[1]);
  65. return $str[0];
  66. }
  67. //HTML表格的每行转为CSV格式数组
  68. function get_tr_array($table) {
  69. $table = preg_replace("'<td[^>]*?>'si",'"',$table);
  70. $table = str_replace("",'",',$table);
  71. $table = str_replace("","{tr}",$table);
  72. //去掉 HTML 标记
  73. $table = preg_replace("'<[\/\!]*?[^<>]*?>'si","",$table);
  74. //去掉空白字符
  75. $table = preg_replace("'([\r\n])[\s]+'","",$table);
  76. $table = str_replace(" ","",$table);
  77. $table = str_replace(" ","",$table);
  78. $table = explode(",{tr}",$table);
  79. array_pop($table);
  80. return $table;
  81. }
  82. //将HTML表格的每行每列转为数组,采集表格数据
  83. function get_td_array($table) {
  84. $table = preg_replace("'<table[^>]*?>'si","",$table);
  85. $table = preg_replace("'<tr[^>]*?>'si","",$table);
  86. $table = preg_replace("'<td[^>]*?>'si","",$table);
  87. $table = str_replace("","{tr}",$table);
  88. $table = str_replace("","{td}",$table);
  89. //去掉 HTML 标记
  90. $table = preg_replace("'<[\/\!]*?[^<>]*?>'si","",$table);
  91. //去掉空白字符
  92. $table = preg_replace("'([\r\n])[\s]+'","",$table);
  93. $table = str_replace(" ","",$table);
  94. $table = str_replace(" ","",$table);
  95. $table = explode('{tr}', $table);
  96. array_pop($table);
  97. foreach ($table as $key=>$tr) {
  98. $td = explode('{td}', $tr);
  99. array_pop($td);
  100. $td_array[] = $td;
  101. }
  102. return $td_array;
  103. }
  104. //返回字符串中的所有单词 $distinct=true 去除重复
  105. function split_en_str($str,$distinct=true) {
  106. preg_match_all('/([a-zA-Z]+)/',$str,$match);
  107. if ($distinct == true) {
  108. $match[1] = array_unique($match[1]);
  109. }
  110. sort($match[1]);
  111. return $match[1];
  112. }
  113. 函数描述及例子
  114. PHP采集程序中常用的函数
  115. 查询关键字
  116. PHP采集程序中常用的函数
  117. <!--?
  118. //获得当前的脚本网址
  119. function get_php_url(){
  120. if(!emptyempty($_SERVER["REQUEST_URI"])){
  121. $scriptName = $_SERVER["REQUEST_URI"];
  122. $nowurl = $scriptName;
  123. }else{
  124. $scriptName = $_SERVER["PHP_SELF"];
  125. if(emptyempty($_SERVER["QUERY_STRING"])) $nowurl = $scriptName;
  126. else $nowurl = $scriptName."?".$_SERVER["QUERY_STRING"];
  127. }
  128. return $nowurl;
  129. }
  130. //把全角数字转为半角数字
  131. function GetAlabNum($fnum){
  132. $nums = array("0","1","2","3","4","5","6","7","8","9");
  133. $fnums = "0123456789";
  134. for($i=0;$i<=9;$i++) $fnum = str_replace($nums[$i],$fnums[$i],$fnum);
  135. $fnum = ereg_replace("[^0-9\.]|^0{1,}","",$fnum);
  136. if($fnum=="") $fnum=0;
  137. return $fnum;
  138. }
  139. //去除HTML标记
  140. function Text2Html($txt){
  141. $txt = str_replace(" "," ",$txt);
  142. $txt = str_replace("<","<",$txt);
  143. $txt = str_replace("-->",">",$txt);
  144. $txt = preg_replace("/[\r\n]{1,}/isU","
  145. \r\n",$txt);
  146. return $txt;
  147. }
  148. //清除HTML标记
  149. function ClearHtml($str){
  150. $str = str_replace('<','<',$str);
  151. $str = str_replace('>','>',$str);
  152. return $str;
  153. }
  154. //相对路径转化成绝对路径
  155. function relative_to_absolute($content, $feed_url) {
  156. preg_match('/(http|https|ftp):\/\//', $feed_url, $protocol);
  157. $server_url = preg_replace("/(http|https|ftp|news):\/\//", "", $feed_url);
  158. $server_url = preg_replace("/\/.*/", "", $server_url);
  159. if ($server_url == '') {
  160. return $content;
  161. }
  162. if (isset($protocol[0])) {
  163. $new_content = preg_replace('/href="\//', 'href="'.$protocol[0].$server_url.'/', $content);
  164. $new_content = preg_replace('/src="\//', 'src="'.$protocol[0].$server_url.'/', $new_content);
  165. } else {
  166. $new_content = $content;
  167. }
  168. return $new_content;
  169. }
  170. //取得所有链接
  171. function get_all_url($code){
  172. preg_match_all('/<a\s+href=["|\']?([^>"\' ]+)["|\']?\s*[^>]*>([^>]+)<\/a>/i',$code,$arr);
  173. return array('name'=>$arr[2],'url'=>$arr[1]);
  174. }
  175. //获取指定标记中的内容
  176. function get_tag_data($str, $start, $end){
  177. if ( $start == '' || $end == '' ){
  178. return;
  179. }
  180. $str = explode($start, $str);
  181. $str = explode($end, $str[1]);
  182. return $str[0];
  183. }
  184. //HTML表格的每行转为CSV格式数组
  185. function get_tr_array($table) {
  186. $table = preg_replace("'<td[^>]*?>'si",'"',$table);
  187. $table = str_replace("",'",',$table);
  188. $table = str_replace("","{tr}",$table);
  189. //去掉 HTML 标记
  190. $table = preg_replace("'<[\/\!]*?[^<>]*?>'si","",$table);
  191. //去掉空白字符
  192. $table = preg_replace("'([\r\n])[\s]+'","",$table);
  193. $table = str_replace(" ","",$table);
  194. $table = str_replace(" ","",$table);
  195. $table = explode(",{tr}",$table);
  196. array_pop($table);
  197. return $table;
  198. }
  199. //将HTML表格的每行每列转为数组,采集表格数据
  200. function get_td_array($table) {
  201. $table = preg_replace("'<table[^>]*?>'si","",$table);
  202. $table = preg_replace("'<tr[^>]*?>'si","",$table);
  203. $table = preg_replace("'<td[^>]*?>'si","",$table);
  204. $table = str_replace("","{tr}",$table);
  205. $table = str_replace("","{td}",$table);
  206. //去掉 HTML 标记
  207. $table = preg_replace("'<[\/\!]*?[^<>]*?>'si","",$table);
  208. //去掉空白字符
  209. $table = preg_replace("'([\r\n])[\s]+'","",$table);
  210. $table = str_replace(" ","",$table);
  211. $table = str_replace(" ","",$table);
  212. $table = explode('{tr}', $table);
  213. array_pop($table);
  214. foreach ($table as $key=>$tr) {
  215. $td = explode('{td}', $tr);
  216. array_pop($td);
  217. $td_array[] = $td;
  218. }
  219. return $td_array;
  220. }
  221. //返回字符串中的所有单词 $distinct=true 去除重复
  222. function split_en_str($str,$distinct=true) {
  223. preg_match_all('/([a-zA-Z]+)/',$str,$match);
  224. if ($distinct == true) {
  225. $match[1] = array_unique($match[1]);
  226. }
  227. sort($match[1]);
  228. return $match[1];
  229. }
  230. </td[^></tr[^></table[^></td[^></a\s+href=["|\']?([^></td[^></tr[^></table[^></td[^></a\s+href=["|\']?([^>