You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

190 lines
6.7 KiB

  1. import binascii
  2. import base64
  3. import hashlib
  4. import re
  5. import json
  6. from .common import InfoExtractor
  7. from ..utils import (
  8. compat_ord,
  9. compat_urllib_parse,
  10. compat_urllib_request,
  11. ExtractorError,
  12. )
  13. class MyVideoIE(InfoExtractor):
  14. """Information Extractor for myvideo.de."""
  15. _VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/(?:[^/]+/)?watch/([0-9]+)/([^?/]+).*'
  16. IE_NAME = u'myvideo'
  17. _TEST = {
  18. u'url': u'http://www.myvideo.de/watch/8229274/bowling_fail_or_win',
  19. u'file': u'8229274.flv',
  20. u'md5': u'2d2753e8130479ba2cb7e0a37002053e',
  21. u'info_dict': {
  22. u"title": u"bowling-fail-or-win"
  23. }
  24. }
  25. # Original Code from: https://github.com/dersphere/plugin.video.myvideo_de.git
  26. # Released into the Public Domain by Tristan Fischer on 2013-05-19
  27. # https://github.com/rg3/youtube-dl/pull/842
  28. def __rc4crypt(self,data, key):
  29. x = 0
  30. box = list(range(256))
  31. for i in list(range(256)):
  32. x = (x + box[i] + compat_ord(key[i % len(key)])) % 256
  33. box[i], box[x] = box[x], box[i]
  34. x = 0
  35. y = 0
  36. out = ''
  37. for char in data:
  38. x = (x + 1) % 256
  39. y = (y + box[x]) % 256
  40. box[x], box[y] = box[y], box[x]
  41. out += chr(compat_ord(char) ^ box[(box[x] + box[y]) % 256])
  42. return out
  43. def __md5(self,s):
  44. return hashlib.md5(s).hexdigest().encode()
  45. def _real_extract(self,url):
  46. mobj = re.match(self._VALID_URL, url)
  47. if mobj is None:
  48. raise ExtractorError(u'invalid URL: %s' % url)
  49. video_id = mobj.group(1)
  50. GK = (
  51. b'WXpnME1EZGhNRGhpTTJNM01XVmhOREU0WldNNVpHTTJOakpt'
  52. b'TW1FMU5tVTBNR05pWkRaa05XRXhNVFJoWVRVd1ptSXhaVEV3'
  53. b'TnpsbA0KTVRkbU1tSTRNdz09'
  54. )
  55. # Get video webpage
  56. webpage_url = 'http://www.myvideo.de/watch/%s' % video_id
  57. webpage = self._download_webpage(webpage_url, video_id)
  58. mobj = re.search('source src=\'(.+?)[.]([^.]+)\'', webpage)
  59. if mobj is not None:
  60. self.report_extraction(video_id)
  61. video_url = mobj.group(1) + '.flv'
  62. video_title = self._html_search_regex('<title>([^<]+)</title>',
  63. webpage, u'title')
  64. video_ext = self._search_regex('[.](.+?)$', video_url, u'extension')
  65. return [{
  66. 'id': video_id,
  67. 'url': video_url,
  68. 'uploader': None,
  69. 'upload_date': None,
  70. 'title': video_title,
  71. 'ext': video_ext,
  72. }]
  73. mobj = re.search(r'data-video-service="/service/data/video/%s/config' % video_id, webpage)
  74. if mobj is not None:
  75. request = compat_urllib_request.Request('http://www.myvideo.de/service/data/video/%s/config' % video_id, '')
  76. response = self._download_webpage(request, video_id,
  77. u'Downloading video info')
  78. info = json.loads(base64.b64decode(response).decode('utf-8'))
  79. return {'id': video_id,
  80. 'title': info['title'],
  81. 'url': info['streaming_url'].replace('rtmpe', 'rtmpt'),
  82. 'play_path': info['filename'],
  83. 'ext': 'flv',
  84. 'thumbnail': info['thumbnail'][0]['url'],
  85. }
  86. # try encxml
  87. mobj = re.search('var flashvars={(.+?)}', webpage)
  88. if mobj is None:
  89. raise ExtractorError(u'Unable to extract video')
  90. params = {}
  91. encxml = ''
  92. sec = mobj.group(1)
  93. for (a, b) in re.findall('(.+?):\'(.+?)\',?', sec):
  94. if not a == '_encxml':
  95. params[a] = b
  96. else:
  97. encxml = compat_urllib_parse.unquote(b)
  98. if not params.get('domain'):
  99. params['domain'] = 'www.myvideo.de'
  100. xmldata_url = '%s?%s' % (encxml, compat_urllib_parse.urlencode(params))
  101. if 'flash_playertype=MTV' in xmldata_url:
  102. self._downloader.report_warning(u'avoiding MTV player')
  103. xmldata_url = (
  104. 'http://www.myvideo.de/dynamic/get_player_video_xml.php'
  105. '?flash_playertype=D&ID=%s&_countlimit=4&autorun=yes'
  106. ) % video_id
  107. # get enc data
  108. enc_data = self._download_webpage(xmldata_url, video_id).split('=')[1]
  109. enc_data_b = binascii.unhexlify(enc_data)
  110. sk = self.__md5(
  111. base64.b64decode(base64.b64decode(GK)) +
  112. self.__md5(
  113. str(video_id).encode('utf-8')
  114. )
  115. )
  116. dec_data = self.__rc4crypt(enc_data_b, sk)
  117. # extracting infos
  118. self.report_extraction(video_id)
  119. video_url = None
  120. mobj = re.search('connectionurl=\'(.*?)\'', dec_data)
  121. if mobj:
  122. video_url = compat_urllib_parse.unquote(mobj.group(1))
  123. if 'myvideo2flash' in video_url:
  124. self.report_warning(
  125. u'Rewriting URL to use unencrypted rtmp:// ...',
  126. video_id)
  127. video_url = video_url.replace('rtmpe://', 'rtmp://')
  128. if not video_url:
  129. # extract non rtmp videos
  130. mobj = re.search('path=\'(http.*?)\' source=\'(.*?)\'', dec_data)
  131. if mobj is None:
  132. raise ExtractorError(u'unable to extract url')
  133. video_url = compat_urllib_parse.unquote(mobj.group(1)) + compat_urllib_parse.unquote(mobj.group(2))
  134. video_file = self._search_regex('source=\'(.*?)\'', dec_data, u'video file')
  135. video_file = compat_urllib_parse.unquote(video_file)
  136. if not video_file.endswith('f4m'):
  137. ppath, prefix = video_file.split('.')
  138. video_playpath = '%s:%s' % (prefix, ppath)
  139. video_hls_playlist = ''
  140. else:
  141. video_playpath = ''
  142. video_hls_playlist = (
  143. video_file
  144. ).replace('.f4m', '.m3u8')
  145. video_swfobj = self._search_regex('swfobject.embedSWF\(\'(.+?)\'', webpage, u'swfobj')
  146. video_swfobj = compat_urllib_parse.unquote(video_swfobj)
  147. video_title = self._html_search_regex("<h1(?: class='globalHd')?>(.*?)</h1>",
  148. webpage, u'title')
  149. return [{
  150. 'id': video_id,
  151. 'url': video_url,
  152. 'tc_url': video_url,
  153. 'uploader': None,
  154. 'upload_date': None,
  155. 'title': video_title,
  156. 'ext': u'flv',
  157. 'play_path': video_playpath,
  158. 'video_file': video_file,
  159. 'video_hls_playlist': video_hls_playlist,
  160. 'player_url': video_swfobj,
  161. }]