You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

172 lines
5.8 KiB

  1. import binascii
  2. import base64
  3. import hashlib
  4. import re
  5. from .common import InfoExtractor
  6. from ..utils import (
  7. compat_ord,
  8. compat_urllib_parse,
  9. ExtractorError,
  10. )
  11. class MyVideoIE(InfoExtractor):
  12. """Information Extractor for myvideo.de."""
  13. _VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/watch/([0-9]+)/([^?/]+).*'
  14. IE_NAME = u'myvideo'
  15. _TEST = {
  16. u'url': u'http://www.myvideo.de/watch/8229274/bowling_fail_or_win',
  17. u'file': u'8229274.flv',
  18. u'md5': u'2d2753e8130479ba2cb7e0a37002053e',
  19. u'info_dict': {
  20. u"title": u"bowling-fail-or-win"
  21. }
  22. }
  23. # Original Code from: https://github.com/dersphere/plugin.video.myvideo_de.git
  24. # Released into the Public Domain by Tristan Fischer on 2013-05-19
  25. # https://github.com/rg3/youtube-dl/pull/842
  26. def __rc4crypt(self,data, key):
  27. x = 0
  28. box = list(range(256))
  29. for i in list(range(256)):
  30. x = (x + box[i] + compat_ord(key[i % len(key)])) % 256
  31. box[i], box[x] = box[x], box[i]
  32. x = 0
  33. y = 0
  34. out = ''
  35. for char in data:
  36. x = (x + 1) % 256
  37. y = (y + box[x]) % 256
  38. box[x], box[y] = box[y], box[x]
  39. out += chr(compat_ord(char) ^ box[(box[x] + box[y]) % 256])
  40. return out
  41. def __md5(self,s):
  42. return hashlib.md5(s).hexdigest().encode()
  43. def _real_extract(self,url):
  44. mobj = re.match(self._VALID_URL, url)
  45. if mobj is None:
  46. raise ExtractorError(u'invalid URL: %s' % url)
  47. video_id = mobj.group(1)
  48. GK = (
  49. b'WXpnME1EZGhNRGhpTTJNM01XVmhOREU0WldNNVpHTTJOakpt'
  50. b'TW1FMU5tVTBNR05pWkRaa05XRXhNVFJoWVRVd1ptSXhaVEV3'
  51. b'TnpsbA0KTVRkbU1tSTRNdz09'
  52. )
  53. # Get video webpage
  54. webpage_url = 'http://www.myvideo.de/watch/%s' % video_id
  55. webpage = self._download_webpage(webpage_url, video_id)
  56. mobj = re.search('source src=\'(.+?)[.]([^.]+)\'', webpage)
  57. if mobj is not None:
  58. self.report_extraction(video_id)
  59. video_url = mobj.group(1) + '.flv'
  60. video_title = self._html_search_regex('<title>([^<]+)</title>',
  61. webpage, u'title')
  62. video_ext = self._search_regex('[.](.+?)$', video_url, u'extension')
  63. return [{
  64. 'id': video_id,
  65. 'url': video_url,
  66. 'uploader': None,
  67. 'upload_date': None,
  68. 'title': video_title,
  69. 'ext': video_ext,
  70. }]
  71. # try encxml
  72. mobj = re.search('var flashvars={(.+?)}', webpage)
  73. if mobj is None:
  74. raise ExtractorError(u'Unable to extract video')
  75. params = {}
  76. encxml = ''
  77. sec = mobj.group(1)
  78. for (a, b) in re.findall('(.+?):\'(.+?)\',?', sec):
  79. if not a == '_encxml':
  80. params[a] = b
  81. else:
  82. encxml = compat_urllib_parse.unquote(b)
  83. if not params.get('domain'):
  84. params['domain'] = 'www.myvideo.de'
  85. xmldata_url = '%s?%s' % (encxml, compat_urllib_parse.urlencode(params))
  86. if 'flash_playertype=MTV' in xmldata_url:
  87. self._downloader.report_warning(u'avoiding MTV player')
  88. xmldata_url = (
  89. 'http://www.myvideo.de/dynamic/get_player_video_xml.php'
  90. '?flash_playertype=D&ID=%s&_countlimit=4&autorun=yes'
  91. ) % video_id
  92. # get enc data
  93. enc_data = self._download_webpage(xmldata_url, video_id).split('=')[1]
  94. enc_data_b = binascii.unhexlify(enc_data)
  95. sk = self.__md5(
  96. base64.b64decode(base64.b64decode(GK)) +
  97. self.__md5(
  98. str(video_id).encode('utf-8')
  99. )
  100. )
  101. dec_data = self.__rc4crypt(enc_data_b, sk)
  102. # extracting infos
  103. self.report_extraction(video_id)
  104. video_url = None
  105. mobj = re.search('connectionurl=\'(.*?)\'', dec_data)
  106. if mobj:
  107. video_url = compat_urllib_parse.unquote(mobj.group(1))
  108. if 'myvideo2flash' in video_url:
  109. self._downloader.report_warning(u'forcing RTMPT ...')
  110. video_url = video_url.replace('rtmpe://', 'rtmpt://')
  111. if not video_url:
  112. # extract non rtmp videos
  113. mobj = re.search('path=\'(http.*?)\' source=\'(.*?)\'', dec_data)
  114. if mobj is None:
  115. raise ExtractorError(u'unable to extract url')
  116. video_url = compat_urllib_parse.unquote(mobj.group(1)) + compat_urllib_parse.unquote(mobj.group(2))
  117. video_file = self._search_regex('source=\'(.*?)\'', dec_data, u'video file')
  118. video_file = compat_urllib_parse.unquote(video_file)
  119. if not video_file.endswith('f4m'):
  120. ppath, prefix = video_file.split('.')
  121. video_playpath = '%s:%s' % (prefix, ppath)
  122. video_hls_playlist = ''
  123. else:
  124. video_playpath = ''
  125. video_hls_playlist = (
  126. video_file
  127. ).replace('.f4m', '.m3u8')
  128. video_swfobj = self._search_regex('swfobject.embedSWF\(\'(.+?)\'', webpage, u'swfobj')
  129. video_swfobj = compat_urllib_parse.unquote(video_swfobj)
  130. video_title = self._html_search_regex("<h1(?: class='globalHd')?>(.*?)</h1>",
  131. webpage, u'title')
  132. return [{
  133. 'id': video_id,
  134. 'url': video_url,
  135. 'tc_url': video_url,
  136. 'uploader': None,
  137. 'upload_date': None,
  138. 'title': video_title,
  139. 'ext': u'flv',
  140. 'play_path': video_playpath,
  141. 'video_file': video_file,
  142. 'video_hls_playlist': video_hls_playlist,
  143. 'player_url': video_swfobj,
  144. }]