阅读:2399回复:0
使用CURL抓取淘宝页面集成方法
代码如下:
/** * 根据地址抓取淘宝页面html代码 * @param type $url 地址 * @return boolean */ public function getTaoBaoHtml($url) { if (empty($url)) { return false; } $ch = curl_init(); // 设置 url curl_setopt($ch, CURLOPT_URL, $url); // 设置浏览器的特定header curl_setopt($ch, CURLOPT_HTTPHEADER, array( "User-Agent: {Mozilla/5.0 (Windows NT 6.1; WOW64; rv:26.0) Gecko/20100101 Firefox/26.0}", "Accept: {text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8}", "Accept-Language: {zh-cn,zh;q=0.8,en-us;q=0.5,en;q=0.3}", "Cookie:{cq=ccp%3D1; cna=a7suCzOmSTECAXgg9iCf4AtX; t=671b2069c7e8ac444da66d664a397a5f; tracknick=%5Cu4F0D%5Cu6653%5Cu8F8901; _tb_token_=nDiU1vCuzFd0; cookie2=c54709ffbe04a5ccb80283c34d6b00fa; pnm_cku822=128WsMPac%2FFS4KgNn%2BYfhzduo4U2NC0zh9cAS4%3D% 7CWUCLjKhqr873bOIFQcMecSw%3D%7CWMEKRlV% 2B3D9a6XWaidNWNQOSWXwaXugvQHzhxALh%7CX0 YLbX78NUR2b2DHoxnIqZENQqR35TBZbfQ5vooI0b6GHZA3U1kr%7CXkdILog Cr878ZK9I%2B%2FE3QjAD3lFJJaAZRA%3D%3D%7CXUeMwMR2s% 2BTUQk8IPP5TNgWfUjQwonccMCxihTa0fRYgtjgfa4j6%7CXMY K7F8liOvH3hMUpzXkiaU%2FJw%3D%3D}", )); // 页面内容我们并不需要 curl_setopt($ch, CURLOPT_NOBODY, 0); // 只需返回HTTP header curl_setopt($ch, CURLOPT_HEADER, 0); // 返回结果,而不是输出它 //curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1); ob_start(); curl_exec($ch); $html = ob_get_contents(); ob_end_clean(); curl_close($ch); return $html; } 由于工作需要抓取淘宝页面,发现抓取不是那么容易,网上收集的~~~ |
|