You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

199 lines
6.3 KiB

  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. import base64
  5. from .common import InfoExtractor
  6. from ..utils import ExtractorError
  7. class YoukuIE(InfoExtractor):
  8. IE_NAME = 'youku'
  9. _VALID_URL = r'''(?x)
  10. (?:
  11. http://(?:v|player)\.youku\.com/(?:v_show/id_|player\.php/sid/)|
  12. youku:)
  13. (?P<id>[A-Za-z0-9]+)(?:\.html|/v\.swf|)
  14. '''
  15. _TEST = {
  16. 'url': 'http://v.youku.com/v_show/id_XMTc1ODE5Njcy.html',
  17. 'md5': '5f3af4192eabacc4501508d54a8cabd7',
  18. 'info_dict': {
  19. 'id': 'XMTc1ODE5Njcy',
  20. 'title': '★Smile﹗♡ Git Fresh -Booty Music舞蹈.',
  21. 'ext': 'flv'
  22. }
  23. }
  24. def construct_video_urls(self, data1, data2):
  25. # get sid, token
  26. def yk_t(s1, s2):
  27. ls = list(range(256))
  28. t = 0
  29. for i in range(256):
  30. t = (t + ls[i] + ord(s1[i%len(s1)])) % 256
  31. ls[i], ls[t] = ls[t], ls[i]
  32. s, x, y = '', 0, 0
  33. for i in range(len(s2)):
  34. y = (y + 1) % 256
  35. x = (x + ls[y]) % 256
  36. ls[x], ls[y] = ls[y], ls[x]
  37. s += chr((s2[i] ^ ls[(ls[x]+ls[y]) % 256]))
  38. return s
  39. sid, token = yk_t(
  40. 'becaf9be', base64.b64decode(bytes(data2['ep'], 'ascii'))
  41. ).split('_')
  42. # get oip
  43. oip = data2['ip']
  44. # get fileid
  45. string_ls = list(
  46. 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/\:._-1234567890')
  47. shuffled_string_ls = []
  48. seed = data1['seed']
  49. N = len(string_ls)
  50. for ii in range(N):
  51. seed = (seed * 0xd3 + 0x754f) % 0x10000
  52. idx = seed * len(string_ls) // 0x10000
  53. shuffled_string_ls.append(string_ls[idx])
  54. del string_ls[idx]
  55. fileid_dict = {}
  56. for format in data1['streamtypes']:
  57. streamfileid = [
  58. int(i) for i in data1['streamfileids'][format].strip('*').split('*')]
  59. fileid = ''.join(
  60. [shuffled_string_ls[i] for i in streamfileid])
  61. fileid_dict[format] = fileid[:8] + '%s' + fileid[10:]
  62. def get_fileid(format, n):
  63. fileid = fileid_dict[format] % hex(int(n))[2:].upper().zfill(2)
  64. return fileid
  65. # get ep
  66. def generate_ep(format, n):
  67. fileid = get_fileid(format, n)
  68. ep_t = yk_t(
  69. 'bf7e5f01',
  70. bytes('%s_%s_%s' % (sid, fileid, token), 'ascii'))
  71. ep = base64.b64encode(bytes(ep_t, 'latin')).decode()
  72. ep = ep.replace('+', '%2B')
  73. ep = ep.replace('/', '%2F')
  74. ep = ep.replace('=', '%2D')
  75. return ep
  76. # generate video_urls
  77. video_urls_dict = {}
  78. for format in data1['streamtypes']:
  79. video_urls = []
  80. for dt in data1['segs'][format]:
  81. n = str(int(dt['no']))
  82. video_url = \
  83. 'http://k.youku.com/player/getFlvPath/' + \
  84. 'sid/' + sid + \
  85. '_' + str(int(n)+1).zfill(2) + \
  86. '/st/' + self.parse_ext_l(format) + \
  87. '/fileid/' + get_fileid(format, n) + '?' + \
  88. 'K=' + str(dt['k']) + \
  89. '&hd=' + self.get_hd(format) + \
  90. '&myp=0' + \
  91. '&ts=' + str(dt['seconds']) + \
  92. '&ypp=0&ctype=12&ev=1' + \
  93. '&token=' + str(token) + \
  94. '&oip=' + str(oip) + \
  95. '&ep=' + generate_ep(format, n)
  96. video_urls.append(video_url)
  97. video_urls_dict[format] = video_urls
  98. return video_urls_dict
  99. def get_hd(self, fm):
  100. hd_id_dict = {
  101. 'flv': '0',
  102. 'mp4': '1',
  103. 'hd2': '2',
  104. 'hd3': '3',
  105. '3gp': '0',
  106. '3gphd': '1'
  107. }
  108. return hd_id_dict[fm]
  109. def parse_ext_l(self, fm):
  110. ext_dict = {
  111. 'flv': 'flv',
  112. 'mp4': 'mp4',
  113. 'hd2': 'flv',
  114. 'hd3': 'flv',
  115. '3gp': 'flv',
  116. '3gphd': 'mp4',
  117. }
  118. return ext_dict[fm]
  119. def _real_extract(self, url):
  120. mobj = re.match(self._VALID_URL, url)
  121. video_id = mobj.group('id')
  122. # request basic data
  123. data1_url = 'http://v.youku.com/player/getPlayList/VideoIDS/%s' % video_id
  124. data2_url = 'http://v.youku.com/player/getPlayList/VideoIDS/%s/Pf/4/ctype/12/ev/1' % video_id
  125. raw_data1 = self._download_json(data1_url, video_id)
  126. raw_data2 = self._download_json(data2_url, video_id)
  127. data1 = raw_data1['data'][0]
  128. data2 = raw_data2['data'][0]
  129. error_code = data1.get('error_code')
  130. if error_code:
  131. # -8 means blocked outside China.
  132. # Chinese and English, separated by newline.
  133. error = data1.get('error')
  134. raise ExtractorError(
  135. error or 'Server reported error %i' %
  136. error_code,
  137. expected=True)
  138. title = data1['title']
  139. # generate video_urls_dict
  140. video_urls_dict = self.construct_video_urls(data1, data2)
  141. # construct info
  142. entries = []
  143. for fm in data1['streamtypes']:
  144. #formats = []
  145. video_urls = video_urls_dict[fm]
  146. for i in range(len(video_urls)):
  147. if len(entries) < i+1:
  148. entries.append({'formats': []})
  149. entries[i]['formats'].append(
  150. {
  151. 'url': video_urls[i],
  152. 'format_id': fm,
  153. 'ext': self.parse_ext_l(fm),
  154. 'filesize': int(data1['segs'][fm][i]['size'])
  155. }
  156. )
  157. for i in range(len(entries)):
  158. entries[i].update(
  159. {
  160. 'id': '_part%d' % (i+1),
  161. 'title': title,
  162. }
  163. )
  164. if len(entries) > 1:
  165. info = {
  166. '_type': 'multi_video',
  167. 'id': video_id,
  168. 'title': title,
  169. 'entries': entries,
  170. }
  171. else:
  172. info = entries[0]
  173. info['id'] = video_id
  174. return info