curl 是利用 URL 语法在命令行方式下工作的开源文件传输工具
本文在 php 中实现了的一个 curl 批处理的实例
代码如下:
- header("Content-Type:text/html;charset=utf8");
- /* 先获取两个页面的所有 a 标签 *$/ 初始化两个简单处理句柄
- $ch1 = curl_init();
- $ch2 = curl_init();
- curl_setopt_array($ch1,array(
- CURLOPT_URL => 'http://www.sina.com.cn',
- CURLOPT_HEADER => 0,
- CURLOPT_RETURNTRANSFER => 1,
- ));
- curl_setopt_array($ch2,array(
- CURLOPT_URL => 'http://www.baidu.com/',
- CURLOPT_HEADER => 0,
- CURLOPT_RETURNTRANSFER => 1,
- ));
- // 初始化批处理句柄, 并添加简单处理句柄
- $mh = curl_multi_init();
- curl_multi_add_handle($mh,$ch1);
- curl_multi_add_handle($mh,$ch2);
- // 初始化执行状态
- $state = null;
- // 执行批处理
- do{
- $mc = curl_multi_exec($mh,$state);
- }while($mc == CURLM_CALL_MULTI_PERFORM);
- while($mc == CURLM_OK && $state) {
- while (curl_multi_exec($mh, $state) === CURLM_CALL_MULTI_PERFORM);
- // 经过实验, 发现 curl_multi_select($mh) 总是返回 - 1, 意味着一下代码不会执行
- if(curl_multi_select($mh) != -1) {
- do{
- $mc = curl_multi_exec($mh,$state);
- }while($mc == CURLM_CALL_MULTI_PERFORM);
- }
- }
- // 获取内容
- $text = curl_multi_getcontent($ch1);
- $text .= curl_multi_getcontent($ch2);
- // 找到页面中所有的 a 标签, 保存到 $matches
- $matches = null;
- preg_match_all("/(.*?)<\/a>/",$text,$matches);
- // 关闭各个句柄
- curl_multi_remove_handle($mh,$ch1);
- curl_multi_remove_handle($mh,$ch2);
- curl_multi_close($mh);
- /* 在找到的连接中继续查找 title 标签 */
- $handle = array(); // 存储简单处理句柄的数组
- $mhandle = curl_multi_init(); // 批处理句柄
- // 处理 100 个页面
- foreach(array_slice($matches[1],0,100) as $href) {
- $tmp_h = curl_init();
- curl_setopt_array($tmp_h,array(
- CURLOPT_URL => $href,
- CURLOPT_HEADER => 0,
- CURLOPT_RETURNTRANSFER => 1,
- ));
- curl_multi_add_handle($mhandle,$tmp_h);
- $handle[] = $tmp_h;
- }
- do{
- $mrc = curl_multi_exec($mhandle,$active);
- }while($mrc == CURLM_CALL_MULTI_PERFORM);
- while($mrc == CURLM_OK && $active) {
- while(curl_multi_exec($mhandle,$active) == CURLM_CALL_MULTI_PERFORM);
- if(curl_multi_select($mhandle) != -1) {
- do{
- $mrc = curl_multi_exec($mhandle,$active);
- }while($mrc == CURLM_CALL_MULTI_PERFORM);
- }
- }
- // 获取这些页面的内容
- $mtext = null;
- foreach($handle as $tmp_h) {
- $mtext .= curl_multi_getcontent($tmp_h);
- curl_multi_remove_handle($mhandle, $tmp_h);
- }
- $mmatches = array();
- preg_match_all("/
来源: https://www.php1.cn/detail/php-deb7fe56c0.html