小言_互联网的博客

爬虫之全国天气最低的十个城市

300人阅读  评论(0)
__author__ = '田明博'
__date__ = '2019/10/9 21:23'
'''
获取所有城市的天气预报,按最低温度排名
'''
import requests
import operator
from bs4 import BeautifulSoup
import matplotlib.pyplot as plt


def get_page(link):
    '''
    获取每一个页面的源代码,并分析
    :param link:每一个页面的链接
    :return:
    '''
    all_temp = []
    url = link
    resp = requests.get(url)
    resp.encoding = 'utf-8'
    soup = BeautifulSoup(resp.text, 'html5lib')  # html5lib解析器,解析速度没有lxml快
    conMidtab = soup.find('div', attrs={'class': 'conMidtab'})  #第一个,每周七天,提取当天的即可
    tables = conMidtab.find_all('table')
    for table in tables:
        trs = table.find_all('tr')[2:]  # 获取数据
        for index, tr in enumerate(trs):
            one_info = {}  # 一条记录
            tds = tr.find_all('td')  # 找到所有的信息td
            city_td = tds[0]  # 找到city所在的表格
            if index == 0:
                city_td = tds[1]
            city = list(city_td.stripped_strings)[0]  # city名字
            temp = tds[-2]  # 温度
            temp = int(list(temp.stripped_strings)[0])

            one_info['city'] = city
            one_info['temp'] = temp
            all_temp.append(one_info)  # append所有记录
    return all_temp


def show_charset(min_temp_citys):
    '''
    展示图表
    :param min_temp_citys:
    :return:
    '''
    # print(min_temp_citys)
    x = []
    y = []
    # 解析获取的前十数据(字典格式)
    for i in min_temp_citys:
        x.append(i['city'])
        y.append(int(i['temp']))
    plt.rcParams['font.sans-serif'] = ['SimHei']  # 用来正常显示中文标签
    plt.rcParams['axes.unicode_minus'] = False  # 用来正常显示负号
    plt.title('全国温度最低的前十城市')
    plt.xlabel('城市')  # 横坐标
    plt.ylabel('温度℃')  # 纵坐标
    plt.bar(x, y)  # 绘制柱状图
    plt.show()


def main():
    all_infos = []
    # 各地区链接
    links = ['http://www.weather.com.cn/textFC/hb.shtml',
             'http://www.weather.com.cn/textFC/db.shtml',
             'http://www.weather.com.cn/textFC/hd.shtml',
             'http://www.weather.com.cn/textFC/hz.shtml',
             'http://www.weather.com.cn/textFC/hn.shtml',
             'http://www.weather.com.cn/textFC/xb.shtml',
             'http://www.weather.com.cn/textFC/xn.shtml', ]
    for link in links:
        all = get_page(link)
        all_infos = all_infos + all  # 用于拼接列表
    # print(all_infos)
    min_temp_ten = sorted(all_infos, key=operator.itemgetter('temp'))[:10]
    print(min_temp_ten)
    show_charset(min_temp_ten)


if __name__ == '__main__':
    main()

运行截图:

采用matplotlib库绘制。


转载:https://blog.csdn.net/T_I_A_N_/article/details/102480757
查看评论
* 以上用户言论只代表其个人观点,不代表本网站的观点或立场