代码:
1 # coding=utf-8
2 import sys
3 import csv
4 import requests
5 from bs4 import BeautifulSoup
6
7 reload(sys)
8 sys.setdefaultencoding('utf-8')
9 # 请求头设置
10
11 def download(url):
12 db_data = requests.get(url)
13 soup = BeautifulSoup(db_data.text, 'lxml')
14 titles = soup.select(
15 'body > div.mainbox > div.main > div.content > div.listBox > ul > li > div.des > h2 > a:nth-of-type(1)')
16 houses = soup.select('body > div.mainbox > div.main > div.content > div.listBox > ul > li > div.des > p.room')
17 oneaddresss = soup.select(
18 'body > div.mainbox > div.main > div.content > div.listBox > ul > li > div.des > p.add > a:nth-of-type(1)')
19 twoaddresss = soup.select(
20 'body > div.mainbox > div.main > div.content > div.listBox > ul > li > div.des > p.add > a:nth-of-type(2)')
21 prices = soup.select(
22 'body > div.mainbox > div.main > div.content > div.listBox > ul > li > div.listliright > div.money > b')
23 for title, house, oneaddress, twoaddress, price in zip(titles, houses, oneaddresss, twoaddresss, prices):
24 data = [
25 (
26 str(title.string).replace(' ', '').replace('\n', ''),
27 house.get_text().split(' ')[0].replace(' ', '').replace("\n", ""),
28 house.get_text().split(' ')[-1].replace(' ', '').replace("\n", ""),
29 oneaddress.get_text().replace(' ', '').replace("\n", ""),
30 twoaddress.get_text().replace(' ', '').replace("\n", ""),
31 price.get_text().replace(' ', '').replace("\n", "")
32 )
33 ]
34
35 csvfile = open('kf.csv', 'ab')
36 writer = csv.writer(csvfile)
37 print('write one house')
38 writer.writerows(data)
39 csvfile.close()
40
41
42 # 初始化csv文件
43 def info():
44 csvinfo = open('kf.csv', 'ab')
45 begcsv = csv.writer(csvinfo)
46 begcsv.writerow(['title', 'house', 'area', 'address1', 'address2', 'price'])
47 csvinfo.close()
48
49
50 if __name__ == '__main__':
51 info()
52 download(url)
在学习过程中有什么不懂得可以加我的
python学习交流扣扣qun,784758214
群里有不错的学习视频教程、开发工具与电子书籍。
与你分享python企业当下人才需求及怎么从零基础学习好python,和学习什么内容
转载:https://blog.csdn.net/meiguanxi7878/article/details/102490421
查看评论