龙空技术网

如何获取bing搜索页面数据

幸运辰星701 13

前言:

此时大家对“获取页面cookie”大约比较关怀,同学们都想要剖析一些“获取页面cookie”的相关资讯。那么小编也在网上搜集了一些对于“获取页面cookie””的相关资讯,希望大家能喜欢,朋友们一起来学习一下吧!

bing搜索页面爬取已经被屏蔽,几乎不能正常返回结果,奈何用api是需要$的。还是希望能够自己抓取数据,我研究了半天,终于发现了关键,就是再header中一定要带上你自己浏览器的真是cookie数据(我调试时用的是Edge浏览器),代码最新出炉,保证现在能用(2024-4-11),注意,请仅仅用于个人研究学习目的,并轻度使用,如果ip被封,本文作者概不负责。

def search_bing(query, count=15):    import uuid,time, requests    from bs4 import BeautifulSoup    from urllib.parse import urlencode, unquote        # 必应的搜索URL格式    # base_url = ';    base_url = ';    headers = {        "Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",        "Accept-Encoding": "gzip, deflate, br, zstd",        "Accept-Language":"zh-CN,zh;q=0.9",        "Cache-Control":"max-age=0",        "Cookie": '这里张贴你自己的cookie数据',        "Sec-Ch-Ua": '"Not A(Brand";v="99.0.0.0", "Google Chrome";v="121.0.6167.140", "Chromium";v="121.0.6167.140"',        "Sec-Ch-Ua-Arch":"x86",        "Sec-Ch-Ua-Bitness":"64",        "Sec-Ch-Ua-Full-Version":"121.0.6167.140",        "Sec-Ch-Ua-Full-Version-List":'"Not A(Brand";v="99.0.0.0", "Google Chrome";v="121.0.6167.140", "Chromium";v="121.0.6167.140"',        "Sec-Ch-Ua-Mobile":"?0",        "Sec-Ch-Ua-Model":"",        "Sec-Ch-Ua-Platform":"Windows",        "Sec-Ch-Ua-Platform-Version":"10.0.0",        "Sec-Fetch-Dest":"document",        "Sec-Fetch-Mode":"navigate",        "Sec-Fetch-Site":"none",        "Sec-Fetch-User":"?1",        "Upgrade-Insecure-Requests":"1",        "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64)",    }    params = {        'q': query,                                'count':  count+1,                            }    q = urlencode(params)    url = base_url + '?' + q    print(f'url=>{url}')    rs = []    try:        # 发送HTTP请求                with requests.Session() as session:             response = session.get(url, headers=headers, allow_redirects=True)                        if response.status_code == 200:            # 解析HTML内容            soup = BeautifulSoup(response.text, 'html.parser')                        # 查找包含搜索结果的元素            # 注意:这里的类名可能会随着Bing网页的更新而变化,需要根据实际情况调整            results = soup.find_all('li', class_='b_algo')            #print('results',results)                        for result in results:                # 提取标题和链接                title = result.find('h2').get_text()                link = result.find('a',class_='tilk')['href']                el = result.find('div',class_=['b_caption'])                digest = el.get_text() if el else ''                rs.append((title,link,digest))                # 打印结果                #print(f'Title: {title}\nLink: {link}\n')            return rs,response        else:            print('Failed to retrieve search results')            return rs,response    except Exception as ex:        print(f'访问bing出错了:\n{ex}')        return rs,response

标签: #获取页面cookie #怎么看网页数据格式是多少