导读:
curl请求时添加请求头信息可以模拟真人操作,不容易被当成是爬虫机器人(采集),从而可以绕过Incapsula等安全验证机制。
1、首先使用浏览器(示例使用的是火狐浏览器)访问接口网址,使用F12调试,查看请求头信息,如下:
2、实现代码:
-
<?php
-
/**
-
* 开始访问请求
-
* @param $url
-
* @return bool|string
-
*/
-
function fetch_url($url) {
-
$header = FormatHeader($url);
-
$useragent =
'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:83.0) Gecko/20100101 Firefox/83.0';
-
$timeout=
120;
-
$ch = curl_init($url);
-
curl_setopt($ch, CURLOPT_FAILONERROR,
true);
-
//设置请求头信息
-
curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
-
//不取得返回头信息
-
curl_setopt($ch, CURLOPT_HEADER,
0);
-
// 关闭https验证
-
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER,
false);
-
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST,
false);
-
curl_setopt($ch, CURLOPT_FOLLOWLOCATION,
true );
-
curl_setopt($ch, CURLOPT_ENCODING,
"" );
-
curl_setopt($ch, CURLOPT_RETURNTRANSFER,
true );
-
curl_setopt($ch, CURLOPT_AUTOREFERER,
true );
-
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout );
-
curl_setopt($ch, CURLOPT_TIMEOUT, $timeout );
-
curl_setopt($ch, CURLOPT_MAXREDIRS,
10 );
-
curl_setopt($ch, CURLOPT_USERAGENT, $useragent);
-
$content = curl_exec($ch);
-
if(curl_errno($ch))
-
{
-
echo
'Error:' . curl_error($ch);
-
}
-
else
-
{
-
return $content;
-
}
-
curl_close($ch);
-
}
-
-
//添加请求头
-
function FormatHeader($url)
-
{
-
// 解析url
-
$temp = parse_url($url);
-
$query =
isset($temp[
'query']) ? $temp[
'query'] :
'';
-
$path =
isset($temp[
'path']) ? $temp[
'path'] :
'/';
-
$header =
array (
-
"POST {$path}?{$query} HTTP/1.1",
-
"Host: {$temp['host']}",
-
"Referer: http://{$temp['host']}/",
-
"Content-Type: text/xml; charset=utf-8",
-
'Accept: application/json, text/javascript, */*; q=0.01',
-
'Accept-Encoding:gzip, deflate, br',
-
'Accept-Language:zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2',
-
'Connection:keep-alive',
-
'User-Agent: Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:83.0) Gecko/20100101 Firefox/83.0',
-
'X-Requested-With: XMLHttpRequest',
-
);
-
return $header;
-
}
-
?>
3、调用示例:
-
<?php
-
//lcg_value() 返回范围为 (0, 1) 的一个伪随机数
-
$url=
"http://www.xxx.com/getdata.php?v=".lcg_value();
-
//访问网址
-
$html = fetch_url($url);
转载:https://blog.csdn.net/qq15577969/article/details/110913311
查看评论