龙空技术网

Bs4+Url+selenium爬取花瓣网

阿毛杂记 248

前言:

眼前同学们对“css飘花瓣”都比较注意,小伙伴们都想要知道一些“css飘花瓣”的相关知识。那么小编在网摘上收集了一些有关“css飘花瓣””的相关知识,希望各位老铁们能喜欢,各位老铁们快快来学习一下吧!

# coding=utf-8import timefrom bs4 import BeautifulSoupfrom selenium import webdriverfrom urllib.request import urlretrieveclass crawl_huaban:def __init__(self, url):self.url = urldef getHtml(self, url):driver = webdriver.PhantomJS()driver.get(url)driver.implicitly_wait(3)resp = BeautifulSoup(driver.page_source, 'html5lib')driver.quit()return respdef getPage(self):driver = webdriver.PhantomJS()driver.get(self.url)driver.implicitly_wait(3)resp = BeautifulSoup(driver.page_source, 'html5lib')driver.quit()return respdef getImage(self):resp = self.getPage()pins_ids = []pins = resp.find_all("a", class_="img x layer-view loaded")for pin in pins:pins_ids.append(pin.get('href'))pins_ids = pins_ids[2:]total = 1for pinid in pins_ids:print('第{0}张照片'.format(total))img_url = '' % (pinid)img_html = self.getHtml(img_url)img_hold = img_html.find("div", class_="image-holder")img_src = img_hold.find("img").get("src")img_src_url = 'http:%s' % img_srctry:urlretrieve(img_src_url, '%s.jpg' % pinid)print("获取图片:%s成功!" % img_src_url)except:print("获取图片:%s失败,跳过,获取下一张!" % img_src_url)total += 1print("获取图片完毕")if __name__ == '__main__':for i in range(1, 11):print('第{0}页'.format(i))url = ';izxnwygj&page={0}&per_page=20&wfl=1'.format(i)crawler = crawl_huaban(url)start = time.clock()crawler.getImage()end = time.clock()print('总共用时:%03f seconds\n\n' % (end - start))

标签: #css飘花瓣