飞道的博客

python_requests直接调用美团API爬取美团评论

754人阅读  评论(0)

思路:
1、获取一个景点的id,例如
https://www.meituan.com/zhoubianyou/40760851/
最后一个数字就是id
2、直接调用
https://www.meituan.com/ptapi/poi/getcomment
将id和offset参数传进去,pageSize大于50即可,无需登录

import os
import random
import requests
from spider.get_ip import getIpList, getHeader
from utils.read_write import writeOneCsv, readCsv, writeOneJson, readJson, writeCsv
from utils.time_change import timestamp_datetime

save_dir = 'F:\data\other\meituan\json\\'
proxys = getIpList()
headers = getHeader()
os.chdir(r'F:\data\other\meituan')

def search(id,offset):
    url = 'https://www.meituan.com/ptapi/poi/getcomment?id={}&offset={}&pageSize=10000&mode=0&sortType=1'.format(id,offset)
    try:
        requests.DEFAULT_RETRIES = 15
        r = requests.get(url,  headers=random.choice(headers),proxies=random.choice(proxys))
        dic = r.json()
        if 'total' in dic.keys():
            if dic['total']>0:
                writeOneJson(dic,save_dir+str(id)+'_'+str(offset)+'.json')
            else:
                print('xiaoyu')
        else:
            print('total')
            print(url)
    except Exception as e:
        print(e)
        print(url)

def dealData():
    files = os.listdir(save_dir)
    data = []
    for file in files:
        id = file.split('.')[0]
        data1 = readJson(save_dir+file)
        for one in data1:
            comment = one['comment']
            commentTime = one['commentTime']
            commentTime = timestamp_datetime(commentTime)
            data.append([id,comment,commentTime])
    writeCsv(data,'所有景点的评论数据1.csv')

if __name__ == '__main__':
    # dealData()
    dir = 'F:\data\other\meituan\json_old\\'
    filename = 'url.csv'
    files = os.listdir(dir)
    data = readCsv(filename)
    # 第一个还没抓完
    for one in data:
        id = one[0].split('/')[4]
        file = str(id)+'_'+str(0)+'.json'
        data1 = readJson(dir + file)
        length = data1['total']
        for i in range(0,length/50+1):
            search(id,i)

转载:https://blog.csdn.net/qq_30803353/article/details/108027500
查看评论
* 以上用户言论只代表其个人观点,不代表本网站的观点或立场