小言_互联网的博客

QQ音乐歌单爬虫

482人阅读  评论(0)


想要下载歌单里的歌曲

点进一首歌,F12,找到可下载的url

对比几首歌,发现url不一样的地方只有C400后面这段还有vkey

http://ws.stream.qqmusic.qq.com/C400003475RF1Uq3HE.m4a?guid=6611369584&vkey=E91FF64079410B1FB46B35905F98B6C11A8513796747E44FEFF90A9684C55A09367063B069E673AFAE07C11DF4A5155C652F8EB58CA1C4B7&uin=0&fromtag=66

回到歌单,F12,输入fcg

发现mid字段就是我们需要的

现在回到播放器找vkey
搜索key

purl:C400003475RF1Uq3HE.m4a?guid=6611369584&vkey=E91FF64079410B1FB46B35905F98B6C11A8513796747E44FEFF90A9684C55A09367063B069E673AFAE07C11DF4A5155C652F8EB58CA1C4B7&uin=0&fromtag=66

拼接两段网址发现:
http://ws.stream.qqmusic.qq.com/+purl

可以播放
于是我们现在只要想办法得到这个网址

https://u.y.qq.com/cgi-bin/musicu.fcg?-=getplaysongvkey15198797315281087&g_tk=5381&loginUin=0&hostUin=0&format=json&inCharset=utf8&outCharset=utf-8&notice=0&platform=yqq.json&needNewCode=0&data=%7B%22req%22%3A%7B%22module%22%3A%22CDN.SrfCdnDispatchServer%22%2C%22method%22%3A%22GetCdnDispatch%22%2C%22param%22%3A%7B%22guid%22%3A%226611369584%22%2C%22calltype%22%3A0%2C%22userip%22%3A%22%22%7D%7D%2C%22req_0%22%3A%7B%22module%22%3A%22vkey.GetVkeyServer%22%2C%22method%22%3A%22CgiGetVkey%22%2C%22param%22%3A%7B%22guid%22%3A%226611369584%22%2C%22songmid%22%3A%5B%22003FvdBu3wRN6v%22%5D%2C%22songtype%22%3A%5B0%5D%2C%22uin%22%3A%220%22%2C%22loginflag%22%3A1%2C%22platform%22%3A%2220%22%7D%7D%2C%22comm%22%3A%7B%22uin%22%3A0%2C%22format%22%3A%22json%22%2C%22ct%22%3A24%2C%22cv%22%3A0%7D%7D

对比发现,区分每首歌的地方在这段数字和songmid
尝试发现,前面这段数字是什么都无所谓
所以只需要之前得到的songmid

正向步骤如下:
1.获取歌曲songmid
2.通过songmid获取vkey
3.通过vkey组合的下载链接进行歌曲获取

代码:

import requests
import json
import os

starturl ='https://c.y.qq.com/qzone/fcg-bin/fcg_ucc_getcdinfo_byids_cp.fcg?type=1&json=1&utf8=1&onlysong=0&new_format=1&disstid=7195631775&g_tk=5381&loginUin=0&hostUin=0&format=json&inCharset=utf8&outCharset=utf-8&notice=0&platform=yqq.json&needNewCode=0'
print(starturl)

headers = {
        "cookie": 'RK=51FHFw4aE8; pgv_pvi=8430643200; '
                  'ptcz=83cfc479ce75c5a1416df7d87136166109888f38587d9944738abca7ab77d17c; '
                  'tvfe_boss_uuid=e4ba183f02ae980f; pgv_pvid=3169027098; pgv_pvid_new=2426636288_14882e87533; '
                  'mobileUV=1_15f666e2b04_e8a50; pac_uid=1_1278077260; eas_sid=l1C5q306s9W2d845F9u7f1K1U6; '
                  'ptui_loginuin=40370953; o_cookie=1278077260; luin=o1278077260; '
                  'sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%221669eddcdc5156-0905303c6ff588-7d113749-1049088'
                  '-1669eddcdc83f8%22%2C%22%24device_id%22%3A%221669eddcdc5156-0905303c6ff588-7d113749-1049088'
                  '-1669eddcdc83f8%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E7%9B%B4%E6%8E%A5%E6'
                  '%B5%81%E9%87%8F%22%2C%22%24latest_referrer%22%3A%22%22%2C%22%24latest_referrer_host%22%3A%22%22%2C%22'
                  '%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC_%E7%9B%B4%E6%8E%A5%E6%89%93%E5'
                  '%BC%80%22%7D%7D; '
                  'lskey=00010000a5727043706a88a2aebf6044daf687035fcc0804760fd13cac0729275356f7aa88d5157b46210ea6; '
                  'LW_sid=y1s5J425D4j7u9N1Q8Q0j2k383; LW_uid=p1q5u4d584A7f971l820z2k3M9; ts_uid=4705118039; yq_index=0; '
                  'uin=o1278077260; skey=@mXN9mj3as; p_uin=o1278077260; '
                  'pt4_token=cVwioR9KifEllUyD2CPEXz692iNhDH8JE-YwH*5TlRY_; '
                  'p_skey=BE7HSxnTeFIPwrO6sJ*YXyA1xKGxT072f5YAo919LSY_; yqq_stat=0; pgv_si=s3828307968; '
                  'pgv_info=ssid=s3773836208; ts_last=y.qq.com/n/yqq/toplist/4.html; '
                  'ts_refer=link.zhihu.com/%3Ftarget%3Dhttps%253A//y.qq.com/n/yqq/toplist/4.html%2523stat%253Dy_new'
                  '.toplist.menu.4',
        "user-agent": 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3493.3 '
                      'Safari/537.36 ',
        'Referer':'https://y.qq.com/n/yqq/playsquare/7195631775.html'
    }

res = requests.get(url=starturl, headers=headers)
res = res.text
res = json.loads(res)
disstid=res["cdlist"][0]["disstid"]
path = 'music/{}'.format(disstid)
if not os.path.exists(path):
    os.makedirs(path)
    print(path+' 创建成功')
else:
    print(path+'目录已存在')
songname = []
songmid = []
for i in res["cdlist"][0]["songlist"]:
    songname.append(i["name"])
    songmid.append(i["mid"])
mid_name = dict(zip(songmid, songname))
for j in mid_name:
    vkey_url = "https://u.y.qq.com/cgi-bin/musicu.fcg?-=getplaysongvkey00019064020164671902&g_tk=5381&loginUin=0" \
               "&hostUin=0&format=json&inCharset=utf8&outCharset=utf-8&notice=0&platform=yqq.json&needNewCode=0&data" \
               "=%7B%22req_0%22%3A%7B%22module%22%3A%22vkey.GetVkeyServer%22%2C%22method%22%3A%22CgiGetVkey%22%2C" \
               "%22param%22%3A%7B%22guid%22%3A%226611369584%22%2C%22songmid%22%3A%5B%22{}%22%5D%2C" \
               "%22songtype%22%3A%5B0%5D%2C%22uin%22%3A%220%22%2C%22loginflag%22%3A1%2C%22platform%22%3A%2220%22%7D" \
               "%7D%2C%22comm%22%3A%7B%22uin%22%3A0%2C%22format%22%3A%22json%22%2C%22ct%22%3A24%2C%22cv%22%3A0%7D%7D" \
               "".format(j)
    res02 = requests.get(url=vkey_url)
    res02 = res02.text
    res02 = json.loads(res02)
    vkey = res02["req_0"]["data"]["midurlinfo"][0]["purl"]
    url = "http://dl.stream.qqmusic.qq.com/" + vkey

    try:
        filename = "music/"+disstid + "/" + mid_name[j] + ".m4a"
        if os.path.exists(filename):
            print(filename+" 已下载")
        else:
            print(filename)
            res03 = requests.get(url=url, headers=headers)
            with open(filename, "wb") as f:
                f.write(res03.content)
    except:
        continue

下载其他歌单,只要更改starturl就可以了哦~

这里给出一段爬取了2w多条url的代码

import pymysql
import requests
import json
import pprint

db = pymysql.connect(host="localhost", port=3306, user="root", password="2580", db="MUSIC")
cursor = db.cursor()
'''
cursor.execute("DROP TABLE IF EXISTS QQMUSIC")
sql = """CREATE TABLE QQMUSIC (
         SONGMID CHAR(20) NOT NULL PRIMARY KEY,
         NAME    VARCHAR(255),
         URL   VARCHAR(255))"""
cursor.execute(sql)
'''

headers = {
    "cookie": 'RK=51FHFw4aE8; pgv_pvi=8430643200; '
              'ptcz=83cfc479ce75c5a1416df7d87136166109888f38587d9944738abca7ab77d17c; '
              'tvfe_boss_uuid=e4ba183f02ae980f; pgv_pvid=3169027098; pgv_pvid_new=2426636288_14882e87533; '
              'mobileUV=1_15f666e2b04_e8a50; pac_uid=1_1278077260; eas_sid=l1C5q306s9W2d845F9u7f1K1U6; '
              'ptui_loginuin=40370953; o_cookie=1278077260; luin=o1278077260; '
              'sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%221669eddcdc5156-0905303c6ff588-7d113749-1049088'
              '-1669eddcdc83f8%22%2C%22%24device_id%22%3A%221669eddcdc5156-0905303c6ff588-7d113749-1049088'
              '-1669eddcdc83f8%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E7%9B%B4%E6%8E%A5%E6'
              '%B5%81%E9%87%8F%22%2C%22%24latest_referrer%22%3A%22%22%2C%22%24latest_referrer_host%22%3A%22%22%2C%22'
              '%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC_%E7%9B%B4%E6%8E%A5%E6%89%93%E5'
              '%BC%80%22%7D%7D; '
              'lskey=00010000a5727043706a88a2aebf6044daf687035fcc0804760fd13cac0729275356f7aa88d5157b46210ea6; '
              'LW_sid=y1s5J425D4j7u9N1Q8Q0j2k383; LW_uid=p1q5u4d584A7f971l820z2k3M9; ts_uid=4705118039; yq_index=0; '
              'uin=o1278077260; skey=@mXN9mj3as; p_uin=o1278077260; '
              'pt4_token=cVwioR9KifEllUyD2CPEXz692iNhDH8JE-YwH*5TlRY_; '
              'p_skey=BE7HSxnTeFIPwrO6sJ*YXyA1xKGxT072f5YAo919LSY_; yqq_stat=0; pgv_si=s3828307968; '
              'pgv_info=ssid=s3773836208; ts_last=y.qq.com/n/yqq/toplist/4.html; '
              'ts_refer=link.zhihu.com/%3Ftarget%3Dhttps%253A//y.qq.com/n/yqq/toplist/4.html%2523stat%253Dy_new'
              '.toplist.menu.4',
    "user-agent": 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3493.3 '
                  'Safari/537.36 ',
    'Referer': 'https://y.qq.com/n/yqq/playsquare/7195631775.html'
}

def func(num):
    print(num)
    dissurl = "https://c.y.qq.com/splcloud/fcgi-bin/fcg_get_diss_by_tag.fcg?picmid=1&g_tk=5381&loginUin=0&hostUin=0" \
              "&format=json&inCharset=utf8&outCharset=utf-8&notice=0&platform=yqq.json&needNewCode=0&categoryId={}" \
              "&sortId=5&sin=0&ein=19".format(num)
    res0 = requests.get(url=dissurl, headers=headers)
    res0 = res0.text
    res0 = json.loads(res0)  # 获取歌单
    list = res0["data"]["list"]
    if list:
        for k in list:
            dissid = k["dissid"]
            dissname = k["dissname"]

            starturl = 'https://c.y.qq.com/qzone/fcg-bin/fcg_ucc_getcdinfo_byids_cp.fcg?type=1&json=1&utf8=1&onlysong' \
                       '=0&new_format=1&disstid={}&g_tk=5381&loginUin=0&hostUin=0&format=json&inCharset=utf8' \
                       '&outCharset=utf-8&notice=0&platform=yqq.json&needNewCode=0 '.format(dissid)
            res = requests.get(url=starturl, headers=headers)
            res = res.text
            res = json.loads(res)  # 获取歌曲songmid
            songname = []
            songmid = []
            for i in res["cdlist"][0]["songlist"]:
                songname.append(i["name"])
                songmid.append(i["mid"])
            mid_name = dict(zip(songmid, songname))
            for j in mid_name:
                vkey_url = "https://u.y.qq.com/cgi-bin/musicu.fcg?-=getplaysongvkey00019064020164671902&g_tk=5381&loginUin=0" \
                           "&hostUin=0&format=json&inCharset=utf8&outCharset=utf-8&notice=0&platform=yqq.json&needNewCode=0&data" \
                           "=%7B%22req_0%22%3A%7B%22module%22%3A%22vkey.GetVkeyServer%22%2C%22method%22%3A%22CgiGetVkey%22%2C" \
                           "%22param%22%3A%7B%22guid%22%3A%226611369584%22%2C%22songmid%22%3A%5B%22{}%22%5D%2C" \
                           "%22songtype%22%3A%5B0%5D%2C%22uin%22%3A%220%22%2C%22loginflag%22%3A1%2C%22platform%22%3A%2220%22%7D" \
                           "%7D%2C%22comm%22%3A%7B%22uin%22%3A0%2C%22format%22%3A%22json%22%2C%22ct%22%3A24%2C%22cv%22%3A0%7D%7D" \
                           "".format(j)
                res02 = requests.get(url=vkey_url)
                res02 = res02.text
                res02 = json.loads(res02)  # 获取vkey
                vkey = res02["req_0"]["data"]["midurlinfo"][0]["purl"]
                if not vkey:
                    continue

                url = "http://dl.stream.qqmusic.qq.com/" + vkey
                sql = "replace into qqmusic (songmid,name,url) values(%s,%s,%s)"
                cursor.execute(sql, [j, mid_name[j], url])
                db.commit()

for lon in range(1, 227):
    func(lon)

cursor.close()
db.close()


参考网址:https://www.jianshu.com/p/d347bd732bbc


转载:https://blog.csdn.net/weixin_43601907/article/details/102055836
查看评论
* 以上用户言论只代表其个人观点,不代表本网站的观点或立场