You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

164 lines
5.6 KiB

  1. import binascii
  2. import base64
  3. import hashlib
  4. import re
  5. from .common import InfoExtractor
  6. from ..utils import (
  7. compat_ord,
  8. compat_urllib_parse,
  9. ExtractorError,
  10. )
  11. class MyVideoIE(InfoExtractor):
  12. """Information Extractor for myvideo.de."""
  13. _VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/watch/([0-9]+)/([^?/]+).*'
  14. IE_NAME = u'myvideo'
  15. # Original Code from: https://github.com/dersphere/plugin.video.myvideo_de.git
  16. # Released into the Public Domain by Tristan Fischer on 2013-05-19
  17. # https://github.com/rg3/youtube-dl/pull/842
  18. def __rc4crypt(self,data, key):
  19. x = 0
  20. box = list(range(256))
  21. for i in list(range(256)):
  22. x = (x + box[i] + compat_ord(key[i % len(key)])) % 256
  23. box[i], box[x] = box[x], box[i]
  24. x = 0
  25. y = 0
  26. out = ''
  27. for char in data:
  28. x = (x + 1) % 256
  29. y = (y + box[x]) % 256
  30. box[x], box[y] = box[y], box[x]
  31. out += chr(compat_ord(char) ^ box[(box[x] + box[y]) % 256])
  32. return out
  33. def __md5(self,s):
  34. return hashlib.md5(s).hexdigest().encode()
  35. def _real_extract(self,url):
  36. mobj = re.match(self._VALID_URL, url)
  37. if mobj is None:
  38. raise ExtractorError(u'invalid URL: %s' % url)
  39. video_id = mobj.group(1)
  40. GK = (
  41. b'WXpnME1EZGhNRGhpTTJNM01XVmhOREU0WldNNVpHTTJOakpt'
  42. b'TW1FMU5tVTBNR05pWkRaa05XRXhNVFJoWVRVd1ptSXhaVEV3'
  43. b'TnpsbA0KTVRkbU1tSTRNdz09'
  44. )
  45. # Get video webpage
  46. webpage_url = 'http://www.myvideo.de/watch/%s' % video_id
  47. webpage = self._download_webpage(webpage_url, video_id)
  48. mobj = re.search('source src=\'(.+?)[.]([^.]+)\'', webpage)
  49. if mobj is not None:
  50. self.report_extraction(video_id)
  51. video_url = mobj.group(1) + '.flv'
  52. video_title = self._html_search_regex('<title>([^<]+)</title>',
  53. webpage, u'title')
  54. video_ext = self._search_regex('[.](.+?)$', video_url, u'extension')
  55. return [{
  56. 'id': video_id,
  57. 'url': video_url,
  58. 'uploader': None,
  59. 'upload_date': None,
  60. 'title': video_title,
  61. 'ext': video_ext,
  62. }]
  63. # try encxml
  64. mobj = re.search('var flashvars={(.+?)}', webpage)
  65. if mobj is None:
  66. raise ExtractorError(u'Unable to extract video')
  67. params = {}
  68. encxml = ''
  69. sec = mobj.group(1)
  70. for (a, b) in re.findall('(.+?):\'(.+?)\',?', sec):
  71. if not a == '_encxml':
  72. params[a] = b
  73. else:
  74. encxml = compat_urllib_parse.unquote(b)
  75. if not params.get('domain'):
  76. params['domain'] = 'www.myvideo.de'
  77. xmldata_url = '%s?%s' % (encxml, compat_urllib_parse.urlencode(params))
  78. if 'flash_playertype=MTV' in xmldata_url:
  79. self._downloader.report_warning(u'avoiding MTV player')
  80. xmldata_url = (
  81. 'http://www.myvideo.de/dynamic/get_player_video_xml.php'
  82. '?flash_playertype=D&ID=%s&_countlimit=4&autorun=yes'
  83. ) % video_id
  84. # get enc data
  85. enc_data = self._download_webpage(xmldata_url, video_id).split('=')[1]
  86. enc_data_b = binascii.unhexlify(enc_data)
  87. sk = self.__md5(
  88. base64.b64decode(base64.b64decode(GK)) +
  89. self.__md5(
  90. str(video_id).encode('utf-8')
  91. )
  92. )
  93. dec_data = self.__rc4crypt(enc_data_b, sk)
  94. # extracting infos
  95. self.report_extraction(video_id)
  96. video_url = None
  97. mobj = re.search('connectionurl=\'(.*?)\'', dec_data)
  98. if mobj:
  99. video_url = compat_urllib_parse.unquote(mobj.group(1))
  100. if 'myvideo2flash' in video_url:
  101. self._downloader.report_warning(u'forcing RTMPT ...')
  102. video_url = video_url.replace('rtmpe://', 'rtmpt://')
  103. if not video_url:
  104. # extract non rtmp videos
  105. mobj = re.search('path=\'(http.*?)\' source=\'(.*?)\'', dec_data)
  106. if mobj is None:
  107. raise ExtractorError(u'unable to extract url')
  108. video_url = compat_urllib_parse.unquote(mobj.group(1)) + compat_urllib_parse.unquote(mobj.group(2))
  109. video_file = self._search_regex('source=\'(.*?)\'', dec_data, u'video file')
  110. video_file = compat_urllib_parse.unquote(video_file)
  111. if not video_file.endswith('f4m'):
  112. ppath, prefix = video_file.split('.')
  113. video_playpath = '%s:%s' % (prefix, ppath)
  114. video_hls_playlist = ''
  115. else:
  116. video_playpath = ''
  117. video_hls_playlist = (
  118. video_file
  119. ).replace('.f4m', '.m3u8')
  120. video_swfobj = self._search_regex('swfobject.embedSWF\(\'(.+?)\'', webpage, u'swfobj')
  121. video_swfobj = compat_urllib_parse.unquote(video_swfobj)
  122. video_title = self._html_search_regex("<h1(?: class='globalHd')?>(.*?)</h1>",
  123. webpage, u'title')
  124. return [{
  125. 'id': video_id,
  126. 'url': video_url,
  127. 'tc_url': video_url,
  128. 'uploader': None,
  129. 'upload_date': None,
  130. 'title': video_title,
  131. 'ext': u'flv',
  132. 'play_path': video_playpath,
  133. 'video_file': video_file,
  134. 'video_hls_playlist': video_hls_playlist,
  135. 'player_url': video_swfobj,
  136. }]