石家庄做网站的公司有哪些,宝塔linux面板官网,北京建设工程造价信息网官网,加强残联网站建设利用BeautifulSoup库#xff0c;获取前250本图书的信息#xff0c;需要爬取的信息包括书名、书名的URL链接、作者、出版社和出版时间、书本价格、评分和评论#xff0c;把这些信息存到txt文件#xff0c;要求将这些信息对齐#xff0c;排列整齐 (我是刚学习网络爬虫#… 利用BeautifulSoup库获取前250本图书的信息需要爬取的信息包括书名、书名的URL链接、作者、出版社和出版时间、书本价格、评分和评论把这些信息存到txt文件要求将这些信息对齐排列整齐 (我是刚学习网络爬虫代码如有错误望指正) 网址为https://book.douban.com/top250 代码如下
import requests
from bs4 import BeautifulSoup
import time
headers{User-Agent: Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.106 Safari/537.36}
book_list[]def book_info(url):book_datarequests.get(url,headersheaders)soupBeautifulSoup(book_data.text,lxml)book_issoup.select(div.pl2 a)linessoup.select(p.pl)markssoup.select(span.rating_nums)comtentssoup.select(span.inq)for book_i,line,mark,comtent in zip(book_is,lines,marks,comtents):lineline.get_text().split(/)data{book_name:book_i.get_text().replace(\n,).replace( ,),book_url:book_i[href],line: .join(line),mark:mark.get_text(),comtent:comtent.get_text()}book_list.append(data)if __name____main__:urls[https://book.douban.com/top250?start{}.format(str(i)) for i in range(0,250,25)]for url in urls:book_info(url)time.sleep(1)for word in book_list:foopen(rD:\Python爬虫\doubanbook.txt,a,encodingutf-8)try:fo.write(word[book_name] word[book_url] word[line] word[mark]分 word[comtent]\n)fo.write(\n)fo.close()except UnicodeDecodeError:pass结果部分截图