- <?php
- $fp = @fopen ( "semallurl.txt", "a+" );
- $kws1 = "上海酒店,北京酒店,广州酒店,天津酒店,广州酒店";
- $kws = explode ( ",", $kws1 );
- foreach ( $kws as $kw ) {
- $keywords = $kw;
- $enkeywords = urlencode ( $keywords );
- $pageURL = "http://www.baidu.com/s?word=$enkeywords";
- $contents = fetch ( $pageURL ); /* 抓取页面 */
- $contents = preg_replace ( '/<script[^>]*?>.*?<\\/script>/', "", $contents ); /*
- *
- * 去掉js代码
- */
- $contents_left = "";
- $contents_right = "";
- $ads_left_green = "";
- $ads_left_white = "";
- $contentsbytwoside = "";
- $ads_right = ""; /* 变量初始化 */
- $contentsbytwoside = explode ( '<div id="content_', $contents );
- $contents_right = $contentsbytwoside [1];
- $contents_right = '<div id="content_' . $contents_right; /* 搜索结果右边部分 */
- $contents_left = $contentsbytwoside [2];
- $contents_left = '<div id="content_' . $contents_left; /* 搜索结果左边部分 */
- preg_match_all ( '/(<div id=\\"[0-9]*\\" class=\\"ec_pp_f ec_pp_top.*?)<a href=\\"http:\\/\\/e\\.baidu\\.com\\//', $contents_left, $ads_left_white );
- preg_match_all ( '/(<table class=\\"EC_mr15 EC_ppim_top ec_pp_f.*?<\\/table>)/', $contents_left, $ads_left_green );
- preg_match_all ( '(<div id=\\"bdfs[^>]*class=\\"EC_im EC_fr EC_PP EC_idea1017 \\">.*?<a class=\\"EC_BL EC_desc\\".*?<\\/a>)', $contents_right, $ads_right );
- echo "------------Keywords ads for" . $kw . "start ------------------------------------<br>";
- fwrite ( $fp, "----------" . $kw . " ads start------------------------- \\r\\n" );
- echo "left ads with green background is<br>";
- /* print_r($ads_left_green[0]); */
- foreach ( $ads_left_green [0] as $tg1 ) {
- preg_match ( '/<span>.*?<\\/span>/', $tg1, $tg11 );
- fwrite ( $fp, strip_tags ( $tg11 [0] ) . "\\r\\n" );
- echo $tg11 [0] . "<br>";
- }
- ;
- echo "<p>-------------<br>";
- echo "left ads with white background is<br>";
- /* print_r($ads_left_white[0]); */
- foreach ( $ads_left_white [0] as $tg2 ) {
- preg_match ( '/<span class=\\"ec_url\\">.*?<\\/span>/', $tg2, $tg22 );
- fwrite ( $fp, strip_tags ( $tg22 [0] ) . "\\r\\n" );
- echo $tg22 [0] . "<br>";
- }
- ;
- echo "<p>-------------<br>";
- echo "right ads with is<br>";
- /* print_r($ads_right[0]); */
- foreach ( $ads_right [0] as $tg3 ) {
- preg_match ( '/(<font size=\\"-1\\" class=\\"EC_url\\">.*?<\\/font>)/', $tg3, $tg33 );
- fwrite ( $fp, strip_tags ( $tg33 [0] ) . "\\r\\n" );
- echo $tg33 [0] . "<br>";
- }
- ;
- echo "---------------Keywords ads for" . $kw . "END ------------------------------------<br>";
- fwrite ( $fp, "----------" . $kw . " ads End------------------------- \\r\\n" );
- }
- ;
- fwrite ( $fp, date ( "Y-m-d H:i:s" ) . " PHP代码自动运行!\\r\\n" );
- fclose ( $fp );
- function fetch($Date) {
- $ch = curl_init ();
- $timeout = 5;
- curl_setopt ( $ch, CURLOPT_URL, "$Date" );
- curl_setopt ( $ch, CURLOPT_RETURNTRANSFER, 1 );
- curl_setopt ( $ch, CURLOPT_USERAGENT, "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1)" );
- curl_setopt ( $ch, CURLOPT_CONNECTTIMEOUT, $timeout );
- $contents = curl_exec ( $ch );
- curl_close ( $ch );
- return $contents;
- }
- ?>
- //该片段来自于http://www.codesnippet.cn/detail/051220137755.html
来源: http://www.codesnippet.cn/detail/051220137755.html