- <?php
- /**
- *
- * @authors HG (hg0728@qq.com)
- * @date 2015-05-22 17:00:48
- * @version 1.0
- */
- header("Content-type:text/html;charset=utf-8");
- function getCurl($url) {
- $ch = curl_init();
- curl_setopt($ch, CURLOPT_URL, $url);
- curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
- curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
- curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
- $result = curl_exec($ch);
- curl_close($ch);
- return $result;
- }
- function preg_list($str){//从curl获得指定内容
- $regex = '/<h3><a class="titlelnk" href="(.*?)" target="_blank">(.*?)<\\/a><\\/h3>/';
- $isMatched = preg_match_all($regex, $str, $matches);
- for ($i=0; $i < $isMatched; $i++) {
- $str = $matches[1][$i] .' '. $matches[2][$i];
- echo $matches[1][$i];
- file_put_contents('blogs.txt', $str. "\\n", FILE_APPEND);
- }
- }
- for ($i=0; $i < 201; $i++) { //翻页抓取
- if($i==0){
- $url = 'http://www.cnblogs.com/';
- $str = getCurl($url);
- }
- else {
- $url = 'http://www.cnblogs.com/sitehome/p/'.$i;
- $str = getCurl($url);
- }
- preg_list($str);
- }
- //该片段来自于http://www.codesnippet.cn/detail/0206201512748.html
来源: http://www.codesnippet.cn/detail/0206201512748.html