前言:
此时大家对“获取页面cookie”大约比较关怀,同学们都想要剖析一些“获取页面cookie”的相关资讯。那么小编也在网上搜集了一些对于“获取页面cookie””的相关资讯,希望大家能喜欢,朋友们一起来学习一下吧!bing搜索页面爬取已经被屏蔽,几乎不能正常返回结果,奈何用api是需要$的。还是希望能够自己抓取数据,我研究了半天,终于发现了关键,就是再header中一定要带上你自己浏览器的真是cookie数据(我调试时用的是Edge浏览器),代码最新出炉,保证现在能用(2024-4-11),注意,请仅仅用于个人研究学习目的,并轻度使用,如果ip被封,本文作者概不负责。
def search_bing(query, count=15): import uuid,time, requests from bs4 import BeautifulSoup from urllib.parse import urlencode, unquote # 必应的搜索URL格式 # base_url = '; base_url = '; headers = { "Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", "Accept-Encoding": "gzip, deflate, br, zstd", "Accept-Language":"zh-CN,zh;q=0.9", "Cache-Control":"max-age=0", "Cookie": '这里张贴你自己的cookie数据', "Sec-Ch-Ua": '"Not A(Brand";v="99.0.0.0", "Google Chrome";v="121.0.6167.140", "Chromium";v="121.0.6167.140"', "Sec-Ch-Ua-Arch":"x86", "Sec-Ch-Ua-Bitness":"64", "Sec-Ch-Ua-Full-Version":"121.0.6167.140", "Sec-Ch-Ua-Full-Version-List":'"Not A(Brand";v="99.0.0.0", "Google Chrome";v="121.0.6167.140", "Chromium";v="121.0.6167.140"', "Sec-Ch-Ua-Mobile":"?0", "Sec-Ch-Ua-Model":"", "Sec-Ch-Ua-Platform":"Windows", "Sec-Ch-Ua-Platform-Version":"10.0.0", "Sec-Fetch-Dest":"document", "Sec-Fetch-Mode":"navigate", "Sec-Fetch-Site":"none", "Sec-Fetch-User":"?1", "Upgrade-Insecure-Requests":"1", "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64)", } params = { 'q': query, 'count': count+1, } q = urlencode(params) url = base_url + '?' + q print(f'url=>{url}') rs = [] try: # 发送HTTP请求 with requests.Session() as session: response = session.get(url, headers=headers, allow_redirects=True) if response.status_code == 200: # 解析HTML内容 soup = BeautifulSoup(response.text, 'html.parser') # 查找包含搜索结果的元素 # 注意:这里的类名可能会随着Bing网页的更新而变化,需要根据实际情况调整 results = soup.find_all('li', class_='b_algo') #print('results',results) for result in results: # 提取标题和链接 title = result.find('h2').get_text() link = result.find('a',class_='tilk')['href'] el = result.find('div',class_=['b_caption']) digest = el.get_text() if el else '' rs.append((title,link,digest)) # 打印结果 #print(f'Title: {title}\nLink: {link}\n') return rs,response else: print('Failed to retrieve search results') return rs,response except Exception as ex: print(f'访问bing出错了:\n{ex}') return rs,response
版权声明:
本站文章均来自互联网搜集,如有侵犯您的权益,请联系我们删除,谢谢。
标签: #获取页面cookie #怎么看网页数据格式是多少