You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

268 lines
8.3 KiB

Switch codebase to use sanitized_Request instead of compat_urllib_request.Request [downloader/dash] Use sanitized_Request [downloader/http] Use sanitized_Request [atresplayer] Use sanitized_Request [bambuser] Use sanitized_Request [bliptv] Use sanitized_Request [brightcove] Use sanitized_Request [cbs] Use sanitized_Request [ceskatelevize] Use sanitized_Request [collegerama] Use sanitized_Request [extractor/common] Use sanitized_Request [crunchyroll] Use sanitized_Request [dailymotion] Use sanitized_Request [dcn] Use sanitized_Request [dramafever] Use sanitized_Request [dumpert] Use sanitized_Request [eitb] Use sanitized_Request [escapist] Use sanitized_Request [everyonesmixtape] Use sanitized_Request [extremetube] Use sanitized_Request [facebook] Use sanitized_Request [fc2] Use sanitized_Request [flickr] Use sanitized_Request [4tube] Use sanitized_Request [gdcvault] Use sanitized_Request [extractor/generic] Use sanitized_Request [hearthisat] Use sanitized_Request [hotnewhiphop] Use sanitized_Request [hypem] Use sanitized_Request [iprima] Use sanitized_Request [ivi] Use sanitized_Request [keezmovies] Use sanitized_Request [letv] Use sanitized_Request [lynda] Use sanitized_Request [metacafe] Use sanitized_Request [minhateca] Use sanitized_Request [miomio] Use sanitized_Request [meovideo] Use sanitized_Request [mofosex] Use sanitized_Request [moniker] Use sanitized_Request [mooshare] Use sanitized_Request [movieclips] Use sanitized_Request [mtv] Use sanitized_Request [myvideo] Use sanitized_Request [neteasemusic] Use sanitized_Request [nfb] Use sanitized_Request [niconico] Use sanitized_Request [noco] Use sanitized_Request [nosvideo] Use sanitized_Request [novamov] Use sanitized_Request [nowness] Use sanitized_Request [nuvid] Use sanitized_Request [played] Use sanitized_Request [pluralsight] Use sanitized_Request [pornhub] Use sanitized_Request [pornotube] Use sanitized_Request [primesharetv] Use sanitized_Request [promptfile] Use sanitized_Request [qqmusic] Use sanitized_Request [rtve] Use sanitized_Request [safari] Use sanitized_Request [sandia] Use sanitized_Request [shared] Use sanitized_Request [sharesix] Use sanitized_Request [sina] Use sanitized_Request [smotri] Use sanitized_Request [sohu] Use sanitized_Request [spankwire] Use sanitized_Request [sportdeutschland] Use sanitized_Request [streamcloud] Use sanitized_Request [streamcz] Use sanitized_Request [tapely] Use sanitized_Request [tube8] Use sanitized_Request [tubitv] Use sanitized_Request [twitch] Use sanitized_Request [twitter] Use sanitized_Request [udemy] Use sanitized_Request [vbox7] Use sanitized_Request [veoh] Use sanitized_Request [vessel] Use sanitized_Request [vevo] Use sanitized_Request [viddler] Use sanitized_Request [videomega] Use sanitized_Request [viewvster] Use sanitized_Request [viki] Use sanitized_Request [vk] Use sanitized_Request [vodlocker] Use sanitized_Request [voicerepublic] Use sanitized_Request [wistia] Use sanitized_Request [xfileshare] Use sanitized_Request [xtube] Use sanitized_Request [xvideos] Use sanitized_Request [yandexmusic] Use sanitized_Request [youku] Use sanitized_Request [youporn] Use sanitized_Request [youtube] Use sanitized_Request [patreon] Use sanitized_Request [extractor/common] Remove unused import [nfb] PEP 8
9 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import base64
  4. import random
  5. import string
  6. import time
  7. from .common import InfoExtractor
  8. from ..compat import (
  9. compat_urllib_parse,
  10. compat_ord,
  11. )
  12. from ..utils import (
  13. ExtractorError,
  14. sanitized_Request,
  15. )
  16. class YoukuIE(InfoExtractor):
  17. IE_NAME = 'youku'
  18. IE_DESC = '优酷'
  19. _VALID_URL = r'''(?x)
  20. (?:
  21. http://(?:v|player)\.youku\.com/(?:v_show/id_|player\.php/sid/)|
  22. youku:)
  23. (?P<id>[A-Za-z0-9]+)(?:\.html|/v\.swf|)
  24. '''
  25. _TESTS = [{
  26. # MD5 is unstable
  27. 'url': 'http://v.youku.com/v_show/id_XMTc1ODE5Njcy.html',
  28. 'info_dict': {
  29. 'id': 'XMTc1ODE5Njcy_part1',
  30. 'title': '★Smile﹗♡ Git Fresh -Booty Music舞蹈.',
  31. 'ext': 'flv'
  32. }
  33. }, {
  34. 'url': 'http://player.youku.com/player.php/sid/XNDgyMDQ2NTQw/v.swf',
  35. 'only_matching': True,
  36. }, {
  37. 'url': 'http://v.youku.com/v_show/id_XODgxNjg1Mzk2_ev_1.html',
  38. 'info_dict': {
  39. 'id': 'XODgxNjg1Mzk2',
  40. 'title': '武媚娘传奇 85',
  41. },
  42. 'playlist_count': 11,
  43. 'skip': 'Available in China only',
  44. }, {
  45. 'url': 'http://v.youku.com/v_show/id_XMTI1OTczNDM5Mg==.html',
  46. 'info_dict': {
  47. 'id': 'XMTI1OTczNDM5Mg',
  48. 'title': '花千骨 04',
  49. },
  50. 'playlist_count': 13,
  51. }, {
  52. 'url': 'http://v.youku.com/v_show/id_XNjA1NzA2Njgw.html',
  53. 'note': 'Video protected with password',
  54. 'info_dict': {
  55. 'id': 'XNjA1NzA2Njgw',
  56. 'title': '邢義田复旦讲座之想象中的胡人—从“左衽孔子”说起',
  57. },
  58. 'playlist_count': 19,
  59. 'params': {
  60. 'videopassword': '100600',
  61. },
  62. }]
  63. def construct_video_urls(self, data):
  64. # get sid, token
  65. def yk_t(s1, s2):
  66. ls = list(range(256))
  67. t = 0
  68. for i in range(256):
  69. t = (t + ls[i] + compat_ord(s1[i % len(s1)])) % 256
  70. ls[i], ls[t] = ls[t], ls[i]
  71. s = bytearray()
  72. x, y = 0, 0
  73. for i in range(len(s2)):
  74. y = (y + 1) % 256
  75. x = (x + ls[y]) % 256
  76. ls[x], ls[y] = ls[y], ls[x]
  77. s.append(compat_ord(s2[i]) ^ ls[(ls[x] + ls[y]) % 256])
  78. return bytes(s)
  79. sid, token = yk_t(
  80. b'becaf9be', base64.b64decode(data['security']['encrypt_string'].encode('ascii'))
  81. ).decode('ascii').split('_')
  82. # get oip
  83. oip = data['security']['ip']
  84. fileid_dict = {}
  85. for stream in data['stream']:
  86. format = stream.get('stream_type')
  87. fileid = stream['stream_fileid']
  88. fileid_dict[format] = fileid
  89. def get_fileid(format, n):
  90. number = hex(int(str(n), 10))[2:].upper()
  91. if len(number) == 1:
  92. number = '0' + number
  93. streamfileids = fileid_dict[format]
  94. fileid = streamfileids[0:8] + number + streamfileids[10:]
  95. return fileid
  96. # get ep
  97. def generate_ep(format, n):
  98. fileid = get_fileid(format, n)
  99. ep_t = yk_t(
  100. b'bf7e5f01',
  101. ('%s_%s_%s' % (sid, fileid, token)).encode('ascii')
  102. )
  103. ep = base64.b64encode(ep_t).decode('ascii')
  104. return ep
  105. # generate video_urls
  106. video_urls_dict = {}
  107. for stream in data['stream']:
  108. format = stream.get('stream_type')
  109. video_urls = []
  110. for dt in stream['segs']:
  111. n = str(stream['segs'].index(dt))
  112. param = {
  113. 'K': dt['key'],
  114. 'hd': self.get_hd(format),
  115. 'myp': 0,
  116. 'ypp': 0,
  117. 'ctype': 12,
  118. 'ev': 1,
  119. 'token': token,
  120. 'oip': oip,
  121. 'ep': generate_ep(format, n)
  122. }
  123. video_url = \
  124. 'http://k.youku.com/player/getFlvPath/' + \
  125. 'sid/' + sid + \
  126. '_00' + \
  127. '/st/' + self.parse_ext_l(format) + \
  128. '/fileid/' + get_fileid(format, n) + '?' + \
  129. compat_urllib_parse.urlencode(param)
  130. video_urls.append(video_url)
  131. video_urls_dict[format] = video_urls
  132. return video_urls_dict
  133. @staticmethod
  134. def get_ysuid():
  135. return '%d%s' % (int(time.time()), ''.join([
  136. random.choice(string.ascii_letters) for i in range(3)]))
  137. def get_hd(self, fm):
  138. hd_id_dict = {
  139. '3gp': '0',
  140. '3gphd': '1',
  141. 'flv': '0',
  142. 'flvhd': '0',
  143. 'mp4': '1',
  144. 'mp4hd': '1',
  145. 'mp4hd2': '1',
  146. 'mp4hd3': '1',
  147. 'hd2': '2',
  148. 'hd3': '3',
  149. }
  150. return hd_id_dict[fm]
  151. def parse_ext_l(self, fm):
  152. ext_dict = {
  153. '3gp': 'flv',
  154. '3gphd': 'mp4',
  155. 'flv': 'flv',
  156. 'flvhd': 'flv',
  157. 'mp4': 'mp4',
  158. 'mp4hd': 'mp4',
  159. 'mp4hd2': 'flv',
  160. 'mp4hd3': 'flv',
  161. 'hd2': 'flv',
  162. 'hd3': 'flv',
  163. }
  164. return ext_dict[fm]
  165. def get_format_name(self, fm):
  166. _dict = {
  167. '3gp': 'h6',
  168. '3gphd': 'h5',
  169. 'flv': 'h4',
  170. 'flvhd': 'h4',
  171. 'mp4': 'h3',
  172. 'mp4hd': 'h3',
  173. 'mp4hd2': 'h4',
  174. 'mp4hd3': 'h4',
  175. 'hd2': 'h2',
  176. 'hd3': 'h1',
  177. }
  178. return _dict[fm]
  179. def _real_extract(self, url):
  180. video_id = self._match_id(url)
  181. self._set_cookie('youku.com', '__ysuid', self.get_ysuid())
  182. def retrieve_data(req_url, note):
  183. headers = {
  184. 'Referer': req_url,
  185. }
  186. self._set_cookie('youku.com', 'xreferrer', 'http://www.youku.com')
  187. req = sanitized_Request(req_url, headers=headers)
  188. cn_verification_proxy = self._downloader.params.get('cn_verification_proxy')
  189. if cn_verification_proxy:
  190. req.add_header('Ytdl-request-proxy', cn_verification_proxy)
  191. raw_data = self._download_json(req, video_id, note=note)
  192. return raw_data['data']
  193. video_password = self._downloader.params.get('videopassword', None)
  194. # request basic data
  195. basic_data_url = "http://play.youku.com/play/get.json?vid=%s&ct=12" % video_id
  196. if video_password:
  197. basic_data_url += '&pwd=%s' % video_password
  198. data = retrieve_data(basic_data_url, 'Downloading JSON metadata')
  199. error = data.get('error')
  200. if error:
  201. error_note = error.get('note')
  202. if error_note is not None and '因版权原因无法观看此视频' in error_note:
  203. raise ExtractorError(
  204. 'Youku said: Sorry, this video is available in China only', expected=True)
  205. else:
  206. msg = 'Youku server reported error %i' % error.get('code')
  207. if error_note is not None:
  208. msg += ': ' + error_note
  209. raise ExtractorError(msg)
  210. # get video title
  211. title = data['video']['title']
  212. # generate video_urls_dict
  213. video_urls_dict = self.construct_video_urls(data)
  214. # construct info
  215. entries = [{
  216. 'id': '%s_part%d' % (video_id, i + 1),
  217. 'title': title,
  218. 'formats': [],
  219. # some formats are not available for all parts, we have to detect
  220. # which one has all
  221. } for i in range(max(len(v.get('segs')) for v in data['stream']))]
  222. for stream in data['stream']:
  223. fm = stream.get('stream_type')
  224. video_urls = video_urls_dict[fm]
  225. for video_url, seg, entry in zip(video_urls, stream['segs'], entries):
  226. entry['formats'].append({
  227. 'url': video_url,
  228. 'format_id': self.get_format_name(fm),
  229. 'ext': self.parse_ext_l(fm),
  230. 'filesize': int(seg['size']),
  231. })
  232. return {
  233. '_type': 'multi_video',
  234. 'id': video_id,
  235. 'title': title,
  236. 'entries': entries,
  237. }