  • 发现短连接被还原成了原始链接
  • 暂时我们不知道需要下载视频需要的是什么参数,后期通过分析知道只需要sec_uid这个参数即可
  • pc端打开会出现样式问题,我们选择移动端打开,模拟手机环境
  • 因此,爬虫也需要模拟移动端环境进行请求



  • 点击作品和喜欢,可以抓包到请求的链接,这个我们稍后分析
  • 因为现在抖音app有隐藏自己收藏的功能,所以如果用户设置了隐藏,你将无法下载用户收藏的视频哦


  • 多次刷新后的结果如下



  • status_code: 不用说,肯定是返回状态
  • aweme_list: 存放视频信息的数组,等会具体分析
  • max_cursor/min_cursor: 这个盲猜都知道用来分页的指针,如果是多页,且请求的不是第一页,需要传其中的某个值,这个暂时不讨论
  • has_more: 是否有多页
  • extra: 额外的信息,当前请求的毫秒级时间戳,以及logid,这个不重要,抖音那边用来日志记录的


  • 可以看出每个视频有两个链接,自己访问一下就知道,一个链接是用户上传的原视频,另一个是抖音那边加了水印的视频
  • 其实到这里大家都知道无水印视频如何下载了,不必赘述了


  • 其实通过分析,抖音下载只需要拿到sec_uid这个参数即可
  • 浏览器打开分享的短连接,就能看到地址栏这个参数

  1. #!/usr/bin/env python
  2. # encoding: utf-8
  3. '''
  4. #-------------------------------------------------------------------
  6. #-------------------------------------------------------------------
  7. #
  8. # @Project Name : 抖音下载小助手
  9. #
  10. # @File Name : main.py
  11. #
  12. # @Programmer : Felix
  13. #
  14. # @Start Date : 2020/7/30 14:42
  15. #
  16. # @Last Update : 2020/7/30 14:42
  17. #
  18. #-------------------------------------------------------------------
  19. '''
  20. import os, sys, requests
  21. import json, re, time
  22. from retrying import retry
  23. from contextlib import closing
  24. class DouYin:
  25. '''
  26. This is a main Class, the file contains all documents.
  27. One document contains paragraphs that have several sentences
  28. It loads the original file and converts the original file to new content
  29. Then the new content will be saved by this class
  30. '''
  31. def __init__(self):
  32. '''
  33. Initial the custom file by some url
  34. '''
  35. self.headers = {
  36. 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
  37. 'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8',
  38. 'pragma': 'no-cache',
  39. 'cache-control': 'no-cache',
  40. 'upgrade-insecure-requests': '1',
  41. 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1',
  42. }
  43. def hello(self):
  44. '''
  45. This is welcome speech
  46. :return: self
  47. '''
  48. print( "*" * 50)
  49. print( ' ' * 15 + '抖音下载小助手')
  50. print( ' ' * 5 + '作者: Felix Date: 2020-05-20 13:14')
  51. print( ' ' * 15 + '无水印 | 有水印')
  52. print( ' ' * 12 + '输入用户的sec_uid即可')
  53. print( ' ' * 2 + '用浏览器打开用户分享链接,复制参数中sec_uid')
  54. print( "*" * 50)
  55. return self
  56. def get_video_urls(self, sec_uid, type_flag='p'):
  57. '''
  58. Get the video link of user
  59. :param type_flag: the type of video
  60. :return: nickname, video_list
  61. '''
  62. user_url_prefix = 'https://www.iesdouyin.com/web/api/v2/aweme/post' if type_flag == 'p' else 'https://www.iesdouyin.com/web/api/v2/aweme/like'
  63. print( '---解析视频链接中...\r')
  64. i = 0
  65. result = []
  66. while result == []:
  67. i = i + 1
  68. print( '---正在第 {} 次尝试...\r'.format(str(i)))
  69. user_url = user_url_prefix + '/?sec_uid=%s&count=2000' % (sec_uid)
  70. response = self.get_request(user_url)
  71. html = json.loads(response.content.decode())
  72. if html[ 'aweme_list'] != []:
  73. result = html[ 'aweme_list']
  74. nickname = None
  75. video_list = []
  76. for item in result:
  77. if nickname is None:
  78. nickname = item[ 'author'][ 'nickname'] if re.sub( r'[\/:*?"<>|]', '', item[ 'author'][ 'nickname']) else None
  79. video_list.append({
  80. 'desc': re.sub( r'[\/:*?"<>|]', '', item[ 'desc']) if item[ 'desc'] else '无标题' + str(int(time.time())),
  81. 'url': item[ 'video'][ 'play_addr'][ 'url_list'][ 0]
  82. })
  83. return nickname, video_list
  84. def get_download_url(self, video_url, watermark_flag):
  85. '''
  86. Whether to download watermarked videos
  87. :param video_url: the url of video
  88. :param watermark_flag: the type of video
  89. :return: the url of o
  90. '''
  91. if watermark_flag == True:
  92. download_url = video_url.replace( 'api.amemv.com', 'aweme.snssdk.com')
  93. else:
  94. download_url = video_url.replace( 'aweme.snssdk.com', 'api.amemv.com')
  95. return download_url
  96. def video_downloader(self, video_url, video_name, watermark_flag=False):
  97. '''
  98. Download the video
  99. :param video_url: the url of video
  100. :param video_name: the name of video
  101. :param watermark_flag: the flag of video
  102. :return: None
  103. '''
  104. size = 0
  105. video_url = self.get_download_url(video_url, watermark_flag=watermark_flag)
  106. with closing(requests.get(video_url, headers=self.headers, stream= True)) as response:
  107. chunk_size = 1024
  108. content_size = int(response.headers[ 'content-length'])
  109. if response.status_code == 200:
  110. sys.stdout.write( '----[文件大小]:%0.2f MB\n' % (content_size / chunk_size / 1024))
  111. with open(video_name + '.mp4', 'wb') as file:
  112. for data in response.iter_content(chunk_size=chunk_size):
  113. file.write(data)
  114. size += len(data)
  115. file.flush()
  116. sys.stdout.write( '----[下载进度]:%.2f%%' % float(size / content_size * 100) + '\r')
  117. sys.stdout.flush()
  118. @retry(stop_max_attempt_number=3)
  119. def get_request(self, url, params=None):
  120. '''
  121. Send a get request
  122. :param url: the url of request
  123. :param params: the params of request
  124. :return: the result of request
  125. '''
  126. if params is None:
  127. params = {}
  128. response = requests.get(url, params=params, headers=self.headers, timeout= 10)
  129. assert response.status_code == 200
  130. return response
  131. @retry(stop_max_attempt_number=3)
  132. def post_request(self, url, data=None):
  133. '''
  134. Send a post request
  135. :param url: the url of request
  136. :param data: the params of request
  137. :return: the result of request
  138. '''
  139. if data is None:
  140. data = {}
  141. response = requests.post(url, data=data, headers=self.headers, timeout= 10)
  142. assert response.status_code == 200
  143. return response
  144. def run(self):
  145. '''
  146. Program entry
  147. '''
  148. sec_uid = input( '请输入用户sec_uid:')
  149. sec_uid = sec_uid if sec_uid else 'MS4wLjABAAAAle_oORaZCgYlB84cLTKSqRFvDgGmgrJsS6n3TfwxonM'
  150. watermark_flag = input( '是否下载带水印的视频 (0-否(默认), 1-是):')
  151. watermark_flag = bool(int(watermark_flag)) if watermark_flag else 0
  152. type_flag = input( 'p-上传的(默认), l-收藏的:')
  153. type_flag = type_flag if type_flag else 'p'
  154. save_dir = input( '保存路径 (默认"./Download/"):')
  155. save_dir = save_dir if save_dir else "./Download/"
  156. nickname, video_list = self.get_video_urls(sec_uid, type_flag)
  157. nickname_dir = os.path.join(save_dir, nickname)
  158. if not os.path.exists(nickname_dir):
  159. os.makedirs(nickname_dir)
  160. if type_flag == 'f':
  161. if 'favorite' not in os.listdir(nickname_dir):
  162. os.mkdir(os.path.join(nickname_dir, 'favorite'))
  163. print( '---视频下载中: 共有%d个作品...\r' % len(video_list))
  164. for num in range(len(video_list)):
  165. print( '---正在解析第%d个视频链接 [%s] 中,请稍后...\n' % (num + 1, video_list[num][ 'desc']))
  166. video_path = os.path.join(nickname_dir, video_list[num][ 'desc']) if type_flag != 'f' else os.path.join(nickname_dir, 'favorite', video_list[num][ 'desc'])
  167. if os.path.isfile(video_path):
  168. print( '---视频已存在...\r')
  169. else:
  170. self.video_downloader(video_list[num][ 'url'], video_path, watermark_flag)
  171. print( '\n')
  172. print( '---下载完成...\r')
  173. if __name__ == "__main__":
  174. DouYin().hello().run()


  • 其中重试的次数,就是在连续请求那个数据连接,直达list中有数据

  • 可以发现已经可以正常下载了





