You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

223 lines
6.8 KiB

10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import hashlib
  4. import math
  5. import random
  6. import re
  7. import time
  8. import uuid
  9. import zlib
  10. from .common import InfoExtractor
  11. from ..compat import compat_urllib_parse
  12. from ..utils import ExtractorError
  13. class IqiyiIE(InfoExtractor):
  14. IE_NAME = 'iqiyi'
  15. _VALID_URL = r'http://(?:www\.)iqiyi.com/.+?\.html'
  16. _TEST = {
  17. 'url': 'http://www.iqiyi.com/v_19rrojlavg.html',
  18. 'md5': '2cb594dc2781e6c941a110d8f358118b',
  19. 'info_dict': {
  20. 'id': '9c1fb1b99d192b21c559e5a1a2cb3c73',
  21. 'title': '美国德州空中惊现奇异云团 酷似UFO',
  22. 'ext': 'f4v',
  23. }
  24. }
  25. def construct_video_urls(self, data, video_id, _uuid):
  26. def do_xor(x, y):
  27. a = y % 3
  28. if a == 1:
  29. return x ^ 121
  30. if a == 2:
  31. return x ^ 72
  32. return x ^ 103
  33. def get_encode_code(l):
  34. a = 0
  35. b = l.split('-')
  36. c = len(b)
  37. s = ''
  38. for i in range(c - 1, -1, -1):
  39. a = do_xor(int(b[c - i - 1], 16), i)
  40. s += chr(a)
  41. return s[::-1]
  42. def get_path_key(x):
  43. mg = ')(*&^flash@#$%a'
  44. tm = self._download_json(
  45. 'http://data.video.qiyi.com/t?tn=' + str(random.random()), video_id)['t']
  46. t = str(int(math.floor(int(tm) / (600.0))))
  47. return hashlib.md5((t + mg + x).encode('utf8')).hexdigest()
  48. video_urls_dict = {}
  49. for i in data['vp']['tkl'][0]['vs']:
  50. if 0 < int(i['bid']) <= 10:
  51. format_id = self.get_format(i['bid'])
  52. else:
  53. continue
  54. video_urls = []
  55. video_urls_info = i['fs']
  56. if not i['fs'][0]['l'].startswith('/'):
  57. t = get_encode_code(i['fs'][0]['l'])
  58. if t.endswith('mp4'):
  59. video_urls_info = i['flvs']
  60. for ii in video_urls_info:
  61. vl = ii['l']
  62. if not vl.startswith('/'):
  63. vl = get_encode_code(vl)
  64. key = get_path_key(
  65. vl.split('/')[-1].split('.')[0])
  66. filesize = ii['b']
  67. base_url = data['vp']['du'].split('/')
  68. base_url.insert(-1, key)
  69. base_url = '/'.join(base_url)
  70. param = {
  71. 'su': _uuid,
  72. 'qyid': uuid.uuid4().hex,
  73. 'client': '',
  74. 'z': '',
  75. 'bt': '',
  76. 'ct': '',
  77. 'tn': str(int(time.time()))
  78. }
  79. api_video_url = base_url + vl + '?' + \
  80. compat_urllib_parse.urlencode(param)
  81. js = self._download_json(api_video_url, video_id)
  82. video_url = js['l']
  83. video_urls.append(
  84. (video_url, filesize))
  85. video_urls_dict[format_id] = video_urls
  86. return video_urls_dict
  87. def get_format(self, bid):
  88. _dict = {
  89. '1': 'h6',
  90. '2': 'h5',
  91. '3': 'h4',
  92. '4': 'h3',
  93. '5': 'h2',
  94. '10': 'h1'
  95. }
  96. return _dict.get(str(bid), None)
  97. def get_bid(self, format_id):
  98. _dict = {
  99. 'h6': '1',
  100. 'h5': '2',
  101. 'h4': '3',
  102. 'h3': '4',
  103. 'h2': '5',
  104. 'h1': '10',
  105. 'best': 'best'
  106. }
  107. return _dict.get(format_id, None)
  108. def get_raw_data(self, tvid, video_id, enc_key, _uuid):
  109. tm = str(int(time.time()))
  110. param = {
  111. 'key': 'fvip',
  112. 'src': hashlib.md5(b'youtube-dl').hexdigest(),
  113. 'tvId': tvid,
  114. 'vid': video_id,
  115. 'vinfo': 1,
  116. 'tm': tm,
  117. 'enc': hashlib.md5(
  118. (enc_key + tm + tvid).encode('utf8')).hexdigest(),
  119. 'qyid': _uuid,
  120. 'tn': random.random(),
  121. 'um': 0,
  122. 'authkey': hashlib.md5(
  123. (tm + tvid).encode('utf8')).hexdigest()
  124. }
  125. api_url = 'http://cache.video.qiyi.com/vms' + '?' + \
  126. compat_urllib_parse.urlencode(param)
  127. raw_data = self._download_json(api_url, video_id)
  128. return raw_data
  129. def get_enc_key(self, swf_url, video_id):
  130. req = self._request_webpage(
  131. swf_url, video_id, note='download swf content')
  132. cn = req.read()
  133. cn = zlib.decompress(cn[8:])
  134. pt = re.compile(b'MixerRemote\x08(?P<enc_key>.+?)\$&vv')
  135. enc_key = self._search_regex(pt, cn, 'enc_key').decode('utf8')
  136. return enc_key
  137. def _real_extract(self, url):
  138. webpage = self._download_webpage(
  139. url, 'temp_id', note='download video page')
  140. tvid = self._search_regex(
  141. r'data-player-tvid\s*=\s*[\'"](\d+)', webpage, 'tvid')
  142. video_id = self._search_regex(
  143. r'data-player-videoid\s*=\s*[\'"]([a-f\d]+)', webpage, 'video_id')
  144. swf_url = self._search_regex(
  145. r'(http://.+?MainPlayer.+?\.swf)', webpage, 'swf player URL')
  146. _uuid = uuid.uuid4().hex
  147. enc_key = self.get_enc_key(swf_url, video_id)
  148. raw_data = self.get_raw_data(tvid, video_id, enc_key, _uuid)
  149. if raw_data['code'] != 'A000000':
  150. raise ExtractorError('Unable to load data. Error code: ' + raw_data['code'])
  151. if not raw_data['data']['vp']['tkl']:
  152. raise ExtractorError('No support iQiqy VIP video')
  153. data = raw_data['data']
  154. title = data['vi']['vn']
  155. # generate video_urls_dict
  156. video_urls_dict = self.construct_video_urls(
  157. data, video_id, _uuid)
  158. # construct info
  159. entries = []
  160. for format_id in video_urls_dict:
  161. video_urls = video_urls_dict[format_id]
  162. for i, video_url_info in enumerate(video_urls):
  163. if len(entries) < i + 1:
  164. entries.append({'formats': []})
  165. entries[i]['formats'].append(
  166. {
  167. 'url': video_url_info[0],
  168. 'filesize': video_url_info[-1],
  169. 'format_id': format_id,
  170. 'preference': int(self.get_bid(format_id))
  171. }
  172. )
  173. for i in range(len(entries)):
  174. self._sort_formats(entries[i]['formats'])
  175. entries[i].update(
  176. {
  177. 'id': '_part%d' % (i + 1),
  178. 'title': title,
  179. }
  180. )
  181. if len(entries) > 1:
  182. info = {
  183. '_type': 'multi_video',
  184. 'id': video_id,
  185. 'title': title,
  186. 'entries': entries,
  187. }
  188. else:
  189. info = entries[0]
  190. info['id'] = video_id
  191. info['title'] = title
  192. return info