You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

214 lines
6.5 KiB

  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. from .common import InfoExtractor
  4. from ..compat import (
  5. compat_chr,
  6. compat_parse_qs,
  7. compat_urllib_parse,
  8. compat_urllib_request,
  9. compat_urlparse,
  10. compat_str,
  11. )
  12. from ..utils import ExtractorError
  13. import re
  14. import time
  15. import json
  16. import uuid
  17. import math
  18. import random
  19. import zlib
  20. import hashlib
  21. class IqiyiIE(InfoExtractor):
  22. IE_NAME = 'iqiyi'
  23. _VALID_URL = r'http://(?:www\.)iqiyi.com/.+?\.html'
  24. _TEST = {
  25. 'url': 'http://www.iqiyi.com/v_19rrojlavg.html',
  26. 'md5': '260f0f59686e65e886995d0ba791ab83',
  27. 'info_dict': {
  28. 'id': '9c1fb1b99d192b21c559e5a1a2cb3c73',
  29. 'title': '美国德州空中惊现奇异云团 酷似UFO',
  30. 'ext': 'f4v'
  31. }
  32. }
  33. def construct_video_urls(self, data, video_id, _uuid):
  34. def do_xor(x, y):
  35. a = y % 3
  36. if a == 1:
  37. return x ^ 121
  38. if a == 2:
  39. return x ^ 72
  40. return x ^ 103
  41. def get_encode_code(l):
  42. a = 0
  43. b = l.split('-')
  44. c = len(b)
  45. s = ''
  46. for i in range(c - 1, -1, -1):
  47. a = do_xor(int(b[c-i-1], 16), i)
  48. s += chr(a)
  49. return s[::-1]
  50. def get_path_key(x):
  51. mg = ')(*&^flash@#$%a'
  52. tm = self._download_json(
  53. 'http://data.video.qiyi.com/t?tn=' + str(random.random()), video_id)['t']
  54. t = str(int(math.floor(int(tm)/(600.0))))
  55. return hashlib.md5(
  56. (t+mg+x).encode('utf8')).hexdigest()
  57. video_urls_dict = {}
  58. for i in data['vp']['tkl'][0]['vs']:
  59. if 0 < int(i['bid']) <= 10:
  60. format_id = self.get_format(i['bid'])
  61. video_urls_info = i['fs']
  62. if not i['fs'][0]['l'].startswith('/'):
  63. t = get_encode_code(i['fs'][0]['l'])
  64. if t.endswith('mp4'):
  65. video_urls_info = i['flvs']
  66. video_urls = []
  67. for ii in video_urls_info:
  68. vl = ii['l']
  69. if not vl.startswith('/'):
  70. vl = get_encode_code(vl)
  71. key = get_path_key(
  72. vl.split('/')[-1].split('.')[0])
  73. filesize = ii['b']
  74. base_url = data['vp']['du'].split('/')
  75. base_url.insert(-1, key)
  76. base_url = '/'.join(base_url)
  77. param = {
  78. 'su': _uuid,
  79. 'qyid': uuid.uuid4().hex,
  80. 'client': '',
  81. 'z': '',
  82. 'bt': '',
  83. 'ct': '',
  84. 'tn': str(int(time.time()))
  85. }
  86. api_video_url = base_url + vl + '?' + \
  87. compat_urllib_parse.urlencode(param)
  88. js = self._download_json(api_video_url, video_id)
  89. video_url = js['l']
  90. video_urls.append(
  91. (video_url, filesize))
  92. video_urls_dict[format_id] = video_urls
  93. return video_urls_dict
  94. def get_format(self, bid):
  95. bid_dict = {
  96. '1': 'standard',
  97. '2': 'high',
  98. '3': 'super',
  99. '4': 'suprt-high',
  100. '5': 'fullhd',
  101. '10': '4k'
  102. }
  103. return bid_dict[str(bid)]
  104. def get_raw_data(self, tvid, video_id, enc_key, _uuid):
  105. tm = str(int(time.time()))
  106. param = {
  107. 'key': 'fvip',
  108. 'src': hashlib.md5(b'youtube-dl').hexdigest(),
  109. 'tvId': tvid,
  110. 'vid': video_id,
  111. 'vinfo': 1,
  112. 'tm': tm,
  113. 'enc': hashlib.md5(
  114. (enc_key + tm + tvid).encode('utf8')).hexdigest(),
  115. 'qyid': _uuid,
  116. 'tn': random.random(),
  117. 'um': 0,
  118. 'authkey': hashlib.md5(
  119. (tm + tvid).encode('utf8')).hexdigest()
  120. }
  121. api_url = 'http://cache.video.qiyi.com/vms' + '?' + \
  122. compat_urllib_parse.urlencode(param)
  123. raw_data = self._download_json(api_url, video_id)
  124. return raw_data
  125. def get_enc_key(self, swf_url, video_id):
  126. req = self._request_webpage(
  127. swf_url, video_id, note='download swf content')
  128. cn = req.read()
  129. cn = zlib.decompress(cn[8:])
  130. pt = re.compile(b'MixerRemote\x08(?P<enc_key>.+?)\$&vv')
  131. enc_key = self._search_regex(pt, cn, 'enc_key').decode('utf8')
  132. return enc_key
  133. def _real_extract(self, url):
  134. webpage = self._download_webpage(
  135. url, 'temp_id', note='download video page')
  136. tvid = self._search_regex(
  137. r'tvId ?= ?(\'|\")(?P<tvid>\d+)', webpage, 'tvid', flags=re.I, group='tvid')
  138. video_id = self._search_regex(
  139. r'videoId ?= ?(\'|\")(?P<video_id>[a-z\d]+)',
  140. webpage, 'video_id', flags=re.I, group='video_id')
  141. swf_url = self._search_regex(
  142. r'(?P<swf>http://.+?MainPlayer.+?\.swf)', webpage, 'swf')
  143. _uuid = uuid.uuid4().hex
  144. enc_key = self.get_enc_key(swf_url, video_id)
  145. raw_data = self.get_raw_data(tvid, video_id, enc_key, _uuid)
  146. assert raw_data['code'] == 'A000000'
  147. if not raw_data['data']['vp']['tkl']:
  148. raise ExtractorError('No support iQiqy VIP video')
  149. data = raw_data['data']
  150. title = data['vi']['vn']
  151. # generate video_urls_dict
  152. video_urls_dict = self.construct_video_urls(data, video_id, _uuid)
  153. # construct info
  154. entries = []
  155. for format_id in video_urls_dict:
  156. video_urls = video_urls_dict[format_id]
  157. for i, video_url_info in enumerate(video_urls):
  158. if len(entries) < i+1:
  159. entries.append({'formats': []})
  160. entries[i]['formats'].append(
  161. {
  162. 'url': video_url_info[0],
  163. 'filesize': video_url_info[-1],
  164. 'format_id': format_id,
  165. }
  166. )
  167. for i in range(len(entries)):
  168. entries[i].update(
  169. {
  170. 'id': '_part%d' % (i+1),
  171. 'title': title,
  172. }
  173. )
  174. if len(entries) > 1:
  175. info = {
  176. '_type': 'multi_video',
  177. 'id': video_id,
  178. 'title': title,
  179. 'entries': entries,
  180. }
  181. else:
  182. info = entries[0]
  183. info['id'] = video_id
  184. info['title'] = title
  185. return info