You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

297 lines
9.9 KiB

9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import hashlib
  4. import math
  5. import os.path
  6. import random
  7. import re
  8. import time
  9. import uuid
  10. import zlib
  11. from .common import InfoExtractor
  12. from ..compat import compat_urllib_parse
  13. from ..utils import (
  14. ExtractorError,
  15. url_basename,
  16. )
  17. class IqiyiIE(InfoExtractor):
  18. IE_NAME = 'iqiyi'
  19. IE_DESC = '爱奇艺'
  20. _VALID_URL = r'http://(?:www\.)iqiyi.com/v_.+?\.html'
  21. _TESTS = [{
  22. 'url': 'http://www.iqiyi.com/v_19rrojlavg.html',
  23. 'md5': '2cb594dc2781e6c941a110d8f358118b',
  24. 'info_dict': {
  25. 'id': '9c1fb1b99d192b21c559e5a1a2cb3c73',
  26. 'title': '美国德州空中惊现奇异云团 酷似UFO',
  27. 'ext': 'f4v',
  28. }
  29. }, {
  30. 'url': 'http://www.iqiyi.com/v_19rrhnnclk.html',
  31. 'info_dict': {
  32. 'id': 'e3f585b550a280af23c98b6cb2be19fb',
  33. 'title': '名侦探柯南第752集',
  34. },
  35. 'playlist': [{
  36. 'md5': '7e49376fecaffa115d951634917fe105',
  37. 'info_dict': {
  38. 'id': 'e3f585b550a280af23c98b6cb2be19fb_part1',
  39. 'ext': 'f4v',
  40. 'title': '名侦探柯南第752集',
  41. },
  42. }, {
  43. 'md5': '41b75ba13bb7ac0e411131f92bc4f6ca',
  44. 'info_dict': {
  45. 'id': 'e3f585b550a280af23c98b6cb2be19fb_part2',
  46. 'ext': 'f4v',
  47. 'title': '名侦探柯南第752集',
  48. },
  49. }, {
  50. 'md5': '0cee1dd0a3d46a83e71e2badeae2aab0',
  51. 'info_dict': {
  52. 'id': 'e3f585b550a280af23c98b6cb2be19fb_part3',
  53. 'ext': 'f4v',
  54. 'title': '名侦探柯南第752集',
  55. },
  56. }, {
  57. 'md5': '4f8ad72373b0c491b582e7c196b0b1f9',
  58. 'info_dict': {
  59. 'id': 'e3f585b550a280af23c98b6cb2be19fb_part4',
  60. 'ext': 'f4v',
  61. 'title': '名侦探柯南第752集',
  62. },
  63. }, {
  64. 'md5': 'd89ad028bcfad282918e8098e811711d',
  65. 'info_dict': {
  66. 'id': 'e3f585b550a280af23c98b6cb2be19fb_part5',
  67. 'ext': 'f4v',
  68. 'title': '名侦探柯南第752集',
  69. },
  70. }, {
  71. 'md5': '9cb1e5c95da25dff0660c32ae50903b7',
  72. 'info_dict': {
  73. 'id': 'e3f585b550a280af23c98b6cb2be19fb_part6',
  74. 'ext': 'f4v',
  75. 'title': '名侦探柯南第752集',
  76. },
  77. }, {
  78. 'md5': '155116e0ff1867bbc9b98df294faabc9',
  79. 'info_dict': {
  80. 'id': 'e3f585b550a280af23c98b6cb2be19fb_part7',
  81. 'ext': 'f4v',
  82. 'title': '名侦探柯南第752集',
  83. },
  84. }, {
  85. 'md5': '53f5db77622ae14fa493ed2a278a082b',
  86. 'info_dict': {
  87. 'id': 'e3f585b550a280af23c98b6cb2be19fb_part8',
  88. 'ext': 'f4v',
  89. 'title': '名侦探柯南第752集',
  90. },
  91. }],
  92. }]
  93. _FORMATS_MAP = [
  94. ('1', 'h6'),
  95. ('2', 'h5'),
  96. ('3', 'h4'),
  97. ('4', 'h3'),
  98. ('5', 'h2'),
  99. ('10', 'h1'),
  100. ]
  101. def construct_video_urls(self, data, video_id, _uuid):
  102. def do_xor(x, y):
  103. a = y % 3
  104. if a == 1:
  105. return x ^ 121
  106. if a == 2:
  107. return x ^ 72
  108. return x ^ 103
  109. def get_encode_code(l):
  110. a = 0
  111. b = l.split('-')
  112. c = len(b)
  113. s = ''
  114. for i in range(c - 1, -1, -1):
  115. a = do_xor(int(b[c - i - 1], 16), i)
  116. s += chr(a)
  117. return s[::-1]
  118. def get_path_key(x, format_id, segment_index):
  119. mg = ')(*&^flash@#$%a'
  120. tm = self._download_json(
  121. 'http://data.video.qiyi.com/t?tn=' + str(random.random()), video_id,
  122. note='Download path key of segment %d for format %s' % (segment_index + 1, format_id)
  123. )['t']
  124. t = str(int(math.floor(int(tm) / (600.0))))
  125. return hashlib.md5((t + mg + x).encode('utf8')).hexdigest()
  126. video_urls_dict = {}
  127. for format_item in data['vp']['tkl'][0]['vs']:
  128. if 0 < int(format_item['bid']) <= 10:
  129. format_id = self.get_format(format_item['bid'])
  130. else:
  131. continue
  132. video_urls = []
  133. video_urls_info = format_item['fs']
  134. if not format_item['fs'][0]['l'].startswith('/'):
  135. t = get_encode_code(format_item['fs'][0]['l'])
  136. if t.endswith('mp4'):
  137. video_urls_info = format_item['flvs']
  138. for segment_index, segment in enumerate(video_urls_info):
  139. vl = segment['l']
  140. if not vl.startswith('/'):
  141. vl = get_encode_code(vl)
  142. key = get_path_key(
  143. vl.split('/')[-1].split('.')[0], format_id, segment_index)
  144. filesize = segment['b']
  145. base_url = data['vp']['du'].split('/')
  146. base_url.insert(-1, key)
  147. base_url = '/'.join(base_url)
  148. param = {
  149. 'su': _uuid,
  150. 'qyid': uuid.uuid4().hex,
  151. 'client': '',
  152. 'z': '',
  153. 'bt': '',
  154. 'ct': '',
  155. 'tn': str(int(time.time()))
  156. }
  157. api_video_url = base_url + vl + '?' + \
  158. compat_urllib_parse.urlencode(param)
  159. js = self._download_json(
  160. api_video_url, video_id,
  161. note='Download video info of segment %d for format %s' % (segment_index + 1, format_id))
  162. video_url = js['l']
  163. video_urls.append(
  164. (video_url, filesize))
  165. video_urls_dict[format_id] = video_urls
  166. return video_urls_dict
  167. def get_format(self, bid):
  168. matched_format_ids = [_format_id for _bid, _format_id in self._FORMATS_MAP if _bid == str(bid)]
  169. return matched_format_ids[0] if len(matched_format_ids) else None
  170. def get_bid(self, format_id):
  171. matched_bids = [_bid for _bid, _format_id in self._FORMATS_MAP if _format_id == format_id]
  172. return matched_bids[0] if len(matched_bids) else None
  173. def get_raw_data(self, tvid, video_id, enc_key, _uuid):
  174. tm = str(int(time.time()))
  175. param = {
  176. 'key': 'fvip',
  177. 'src': hashlib.md5(b'youtube-dl').hexdigest(),
  178. 'tvId': tvid,
  179. 'vid': video_id,
  180. 'vinfo': 1,
  181. 'tm': tm,
  182. 'enc': hashlib.md5(
  183. (enc_key + tm + tvid).encode('utf8')).hexdigest(),
  184. 'qyid': _uuid,
  185. 'tn': random.random(),
  186. 'um': 0,
  187. 'authkey': hashlib.md5(
  188. (tm + tvid).encode('utf8')).hexdigest()
  189. }
  190. api_url = 'http://cache.video.qiyi.com/vms' + '?' + \
  191. compat_urllib_parse.urlencode(param)
  192. raw_data = self._download_json(api_url, video_id)
  193. return raw_data
  194. def get_enc_key(self, swf_url, video_id):
  195. filename, _ = os.path.splitext(url_basename(swf_url))
  196. enc_key_json = self._downloader.cache.load('iqiyi-enc-key', filename)
  197. if enc_key_json is not None:
  198. return enc_key_json[0]
  199. req = self._request_webpage(
  200. swf_url, video_id, note='download swf content')
  201. cn = req.read()
  202. cn = zlib.decompress(cn[8:])
  203. pt = re.compile(b'MixerRemote\x08(?P<enc_key>.+?)\$&vv')
  204. enc_key = self._search_regex(pt, cn, 'enc_key').decode('utf8')
  205. self._downloader.cache.store('iqiyi-enc-key', filename, [enc_key])
  206. return enc_key
  207. def _real_extract(self, url):
  208. webpage = self._download_webpage(
  209. url, 'temp_id', note='download video page')
  210. tvid = self._search_regex(
  211. r'data-player-tvid\s*=\s*[\'"](\d+)', webpage, 'tvid')
  212. video_id = self._search_regex(
  213. r'data-player-videoid\s*=\s*[\'"]([a-f\d]+)', webpage, 'video_id')
  214. swf_url = self._search_regex(
  215. r'(http://[^\'"]+MainPlayer[^.]+\.swf)', webpage, 'swf player URL')
  216. _uuid = uuid.uuid4().hex
  217. enc_key = self.get_enc_key(swf_url, video_id)
  218. raw_data = self.get_raw_data(tvid, video_id, enc_key, _uuid)
  219. if raw_data['code'] != 'A000000':
  220. raise ExtractorError('Unable to load data. Error code: ' + raw_data['code'])
  221. if not raw_data['data']['vp']['tkl']:
  222. raise ExtractorError('No support iQiqy VIP video')
  223. data = raw_data['data']
  224. title = data['vi']['vn']
  225. # generate video_urls_dict
  226. video_urls_dict = self.construct_video_urls(
  227. data, video_id, _uuid)
  228. # construct info
  229. entries = []
  230. for format_id in video_urls_dict:
  231. video_urls = video_urls_dict[format_id]
  232. for i, video_url_info in enumerate(video_urls):
  233. if len(entries) < i + 1:
  234. entries.append({'formats': []})
  235. entries[i]['formats'].append(
  236. {
  237. 'url': video_url_info[0],
  238. 'filesize': video_url_info[-1],
  239. 'format_id': format_id,
  240. 'preference': int(self.get_bid(format_id))
  241. }
  242. )
  243. for i in range(len(entries)):
  244. self._sort_formats(entries[i]['formats'])
  245. entries[i].update(
  246. {
  247. 'id': '%s_part%d' % (video_id, i + 1),
  248. 'title': title,
  249. }
  250. )
  251. if len(entries) > 1:
  252. info = {
  253. '_type': 'multi_video',
  254. 'id': video_id,
  255. 'title': title,
  256. 'entries': entries,
  257. }
  258. else:
  259. info = entries[0]
  260. info['id'] = video_id
  261. info['title'] = title
  262. return info