You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

104 lines
3.3 KiB

  1. import json
  2. import math
  3. import random
  4. import re
  5. import time
  6. from .common import InfoExtractor
  7. from ..utils import (
  8. ExtractorError,
  9. )
  10. class YoukuIE(InfoExtractor):
  11. _VALID_URL = r'(?:http://)?v\.youku\.com/v_show/id_(?P<ID>[A-Za-z0-9]+)\.html'
  12. def _gen_sid(self):
  13. nowTime = int(time.time() * 1000)
  14. random1 = random.randint(1000,1998)
  15. random2 = random.randint(1000,9999)
  16. return "%d%d%d" %(nowTime,random1,random2)
  17. def _get_file_ID_mix_string(self, seed):
  18. mixed = []
  19. source = list("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/\:._-1234567890")
  20. seed = float(seed)
  21. for i in range(len(source)):
  22. seed = (seed * 211 + 30031 ) % 65536
  23. index = math.floor(seed / 65536 * len(source) )
  24. mixed.append(source[int(index)])
  25. source.remove(source[int(index)])
  26. #return ''.join(mixed)
  27. return mixed
  28. def _get_file_id(self, fileId, seed):
  29. mixed = self._get_file_ID_mix_string(seed)
  30. ids = fileId.split('*')
  31. realId = []
  32. for ch in ids:
  33. if ch:
  34. realId.append(mixed[int(ch)])
  35. return ''.join(realId)
  36. def _real_extract(self, url):
  37. mobj = re.match(self._VALID_URL, url)
  38. if mobj is None:
  39. raise ExtractorError(u'Invalid URL: %s' % url)
  40. video_id = mobj.group('ID')
  41. info_url = 'http://v.youku.com/player/getPlayList/VideoIDS/' + video_id
  42. jsondata = self._download_webpage(info_url, video_id)
  43. self.report_extraction(video_id)
  44. try:
  45. config = json.loads(jsondata)
  46. video_title = config['data'][0]['title']
  47. seed = config['data'][0]['seed']
  48. format = self._downloader.params.get('format', None)
  49. supported_format = list(config['data'][0]['streamfileids'].keys())
  50. if format is None or format == 'best':
  51. if 'hd2' in supported_format:
  52. format = 'hd2'
  53. else:
  54. format = 'flv'
  55. ext = u'flv'
  56. elif format == 'worst':
  57. format = 'mp4'
  58. ext = u'mp4'
  59. else:
  60. format = 'flv'
  61. ext = u'flv'
  62. fileid = config['data'][0]['streamfileids'][format]
  63. keys = [s['k'] for s in config['data'][0]['segs'][format]]
  64. except (UnicodeDecodeError, ValueError, KeyError):
  65. raise ExtractorError(u'Unable to extract info section')
  66. files_info=[]
  67. sid = self._gen_sid()
  68. fileid = self._get_file_id(fileid, seed)
  69. #column 8,9 of fileid represent the segment number
  70. #fileid[7:9] should be changed
  71. for index, key in enumerate(keys):
  72. temp_fileid = '%s%02X%s' % (fileid[0:8], index, fileid[10:])
  73. download_url = 'http://f.youku.com/player/getFlvPath/sid/%s_%02X/st/flv/fileid/%s?k=%s' % (sid, index, temp_fileid, key)
  74. info = {
  75. 'id': '%s_part%02d' % (video_id, index),
  76. 'url': download_url,
  77. 'uploader': None,
  78. 'upload_date': None,
  79. 'title': video_title,
  80. 'ext': ext,
  81. }
  82. files_info.append(info)
  83. return files_info