You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

236 lines
7.5 KiB

9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import base64
  4. from .common import InfoExtractor
  5. from ..utils import ExtractorError
  6. from ..compat import (
  7. compat_urllib_parse,
  8. compat_ord,
  9. compat_urllib_request,
  10. )
  11. class YoukuIE(InfoExtractor):
  12. IE_NAME = 'youku'
  13. IE_DESC = '优酷'
  14. _VALID_URL = r'''(?x)
  15. (?:
  16. http://(?:v|player)\.youku\.com/(?:v_show/id_|player\.php/sid/)|
  17. youku:)
  18. (?P<id>[A-Za-z0-9]+)(?:\.html|/v\.swf|)
  19. '''
  20. _TESTS = [{
  21. 'url': 'http://v.youku.com/v_show/id_XMTc1ODE5Njcy.html',
  22. 'md5': '5f3af4192eabacc4501508d54a8cabd7',
  23. 'info_dict': {
  24. 'id': 'XMTc1ODE5Njcy_part1',
  25. 'title': '★Smile﹗♡ Git Fresh -Booty Music舞蹈.',
  26. 'ext': 'flv'
  27. }
  28. }, {
  29. 'url': 'http://player.youku.com/player.php/sid/XNDgyMDQ2NTQw/v.swf',
  30. 'only_matching': True,
  31. }, {
  32. 'url': 'http://v.youku.com/v_show/id_XODgxNjg1Mzk2_ev_1.html',
  33. 'info_dict': {
  34. 'id': 'XODgxNjg1Mzk2',
  35. 'title': '武媚娘传奇 85',
  36. },
  37. 'playlist_count': 11,
  38. }, {
  39. 'url': 'http://v.youku.com/v_show/id_XMTI1OTczNDM5Mg==.html',
  40. 'info_dict': {
  41. 'id': 'XMTI1OTczNDM5Mg',
  42. 'title': '花千骨 04',
  43. },
  44. 'playlist_count': 13,
  45. 'skip': 'Available in China only',
  46. }]
  47. def construct_video_urls(self, data1, data2):
  48. # get sid, token
  49. def yk_t(s1, s2):
  50. ls = list(range(256))
  51. t = 0
  52. for i in range(256):
  53. t = (t + ls[i] + compat_ord(s1[i % len(s1)])) % 256
  54. ls[i], ls[t] = ls[t], ls[i]
  55. s = bytearray()
  56. x, y = 0, 0
  57. for i in range(len(s2)):
  58. y = (y + 1) % 256
  59. x = (x + ls[y]) % 256
  60. ls[x], ls[y] = ls[y], ls[x]
  61. s.append(compat_ord(s2[i]) ^ ls[(ls[x] + ls[y]) % 256])
  62. return bytes(s)
  63. sid, token = yk_t(
  64. b'becaf9be', base64.b64decode(data2['ep'].encode('ascii'))
  65. ).decode('ascii').split('_')
  66. # get oip
  67. oip = data2['ip']
  68. # get fileid
  69. string_ls = list(
  70. 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/\:._-1234567890')
  71. shuffled_string_ls = []
  72. seed = data1['seed']
  73. N = len(string_ls)
  74. for ii in range(N):
  75. seed = (seed * 0xd3 + 0x754f) % 0x10000
  76. idx = seed * len(string_ls) // 0x10000
  77. shuffled_string_ls.append(string_ls[idx])
  78. del string_ls[idx]
  79. fileid_dict = {}
  80. for format in data1['streamtypes']:
  81. streamfileid = [
  82. int(i) for i in data1['streamfileids'][format].strip('*').split('*')]
  83. fileid = ''.join(
  84. [shuffled_string_ls[i] for i in streamfileid])
  85. fileid_dict[format] = fileid[:8] + '%s' + fileid[10:]
  86. def get_fileid(format, n):
  87. fileid = fileid_dict[format] % hex(int(n))[2:].upper().zfill(2)
  88. return fileid
  89. # get ep
  90. def generate_ep(format, n):
  91. fileid = get_fileid(format, n)
  92. ep_t = yk_t(
  93. b'bf7e5f01',
  94. ('%s_%s_%s' % (sid, fileid, token)).encode('ascii')
  95. )
  96. ep = base64.b64encode(ep_t).decode('ascii')
  97. return ep
  98. # generate video_urls
  99. video_urls_dict = {}
  100. for format in data1['streamtypes']:
  101. video_urls = []
  102. for dt in data1['segs'][format]:
  103. n = str(int(dt['no']))
  104. param = {
  105. 'K': dt['k'],
  106. 'hd': self.get_hd(format),
  107. 'myp': 0,
  108. 'ts': dt['seconds'],
  109. 'ypp': 0,
  110. 'ctype': 12,
  111. 'ev': 1,
  112. 'token': token,
  113. 'oip': oip,
  114. 'ep': generate_ep(format, n)
  115. }
  116. video_url = \
  117. 'http://k.youku.com/player/getFlvPath/' + \
  118. 'sid/' + sid + \
  119. '_' + str(int(n) + 1).zfill(2) + \
  120. '/st/' + self.parse_ext_l(format) + \
  121. '/fileid/' + get_fileid(format, n) + '?' + \
  122. compat_urllib_parse.urlencode(param)
  123. video_urls.append(video_url)
  124. video_urls_dict[format] = video_urls
  125. return video_urls_dict
  126. def get_hd(self, fm):
  127. hd_id_dict = {
  128. 'flv': '0',
  129. 'mp4': '1',
  130. 'hd2': '2',
  131. 'hd3': '3',
  132. '3gp': '0',
  133. '3gphd': '1'
  134. }
  135. return hd_id_dict[fm]
  136. def parse_ext_l(self, fm):
  137. ext_dict = {
  138. 'flv': 'flv',
  139. 'mp4': 'mp4',
  140. 'hd2': 'flv',
  141. 'hd3': 'flv',
  142. '3gp': 'flv',
  143. '3gphd': 'mp4'
  144. }
  145. return ext_dict[fm]
  146. def get_format_name(self, fm):
  147. _dict = {
  148. '3gp': 'h6',
  149. '3gphd': 'h5',
  150. 'flv': 'h4',
  151. 'mp4': 'h3',
  152. 'hd2': 'h2',
  153. 'hd3': 'h1'
  154. }
  155. return _dict[fm]
  156. def _real_extract(self, url):
  157. video_id = self._match_id(url)
  158. def retrieve_data(req_url, note):
  159. req = compat_urllib_request.Request(req_url)
  160. cn_verification_proxy = self._downloader.params.get('cn_verification_proxy')
  161. if cn_verification_proxy:
  162. req.add_header('Ytdl-request-proxy', cn_verification_proxy)
  163. raw_data = self._download_json(req, video_id, note=note)
  164. return raw_data['data'][0]
  165. # request basic data
  166. data1 = retrieve_data(
  167. 'http://v.youku.com/player/getPlayList/VideoIDS/%s' % video_id,
  168. 'Downloading JSON metadata 1')
  169. data2 = retrieve_data(
  170. 'http://v.youku.com/player/getPlayList/VideoIDS/%s/Pf/4/ctype/12/ev/1' % video_id,
  171. 'Downloading JSON metadata 2')
  172. error_code = data1.get('error_code')
  173. if error_code:
  174. error = data1.get('error')
  175. if error is not None and '因版权原因无法观看此视频' in error:
  176. raise ExtractorError(
  177. 'Youku said: Sorry, this video is available in China only', expected=True)
  178. else:
  179. msg = 'Youku server reported error %i' % error_code
  180. if error is not None:
  181. msg += ': ' + error
  182. raise ExtractorError(msg)
  183. title = data1['title']
  184. # generate video_urls_dict
  185. video_urls_dict = self.construct_video_urls(data1, data2)
  186. # construct info
  187. entries = [{
  188. 'id': '%s_part%d' % (video_id, i + 1),
  189. 'title': title,
  190. 'formats': [],
  191. # some formats are not available for all parts, we have to detect
  192. # which one has all
  193. } for i in range(max(len(v) for v in data1['segs'].values()))]
  194. for fm in data1['streamtypes']:
  195. video_urls = video_urls_dict[fm]
  196. for video_url, seg, entry in zip(video_urls, data1['segs'][fm], entries):
  197. entry['formats'].append({
  198. 'url': video_url,
  199. 'format_id': self.get_format_name(fm),
  200. 'ext': self.parse_ext_l(fm),
  201. 'filesize': int(seg['size']),
  202. })
  203. return {
  204. '_type': 'multi_video',
  205. 'id': video_id,
  206. 'title': title,
  207. 'entries': entries,
  208. }