You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

159 lines
5.5 KiB

  1. # coding: utf-8
  2. from __future__ import unicode_literals, division
  3. import re
  4. from .common import InfoExtractor
  5. from ..compat import (
  6. compat_chr,
  7. compat_ord,
  8. )
  9. from ..utils import (
  10. determine_ext,
  11. ExtractorError,
  12. )
  13. from ..jsinterp import (
  14. JSInterpreter,
  15. _NAME_RE
  16. )
  17. class OpenloadIE(InfoExtractor):
  18. _VALID_URL = r'https?://openload\.(?:co|io)/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)'
  19. _TESTS = [{
  20. 'url': 'https://openload.co/f/kUEfGclsU9o',
  21. 'md5': 'bf1c059b004ebc7a256f89408e65c36e',
  22. 'info_dict': {
  23. 'id': 'kUEfGclsU9o',
  24. 'ext': 'mp4',
  25. 'title': 'skyrim_no-audio_1080.mp4',
  26. 'thumbnail': 're:^https?://.*\.jpg$',
  27. },
  28. }, {
  29. 'url': 'https://openload.co/embed/rjC09fkPLYs',
  30. 'info_dict': {
  31. 'id': 'rjC09fkPLYs',
  32. 'ext': 'mp4',
  33. 'title': 'movie.mp4',
  34. 'thumbnail': 're:^https?://.*\.jpg$',
  35. 'subtitles': {
  36. 'en': [{
  37. 'ext': 'vtt',
  38. }],
  39. },
  40. },
  41. 'params': {
  42. 'skip_download': True, # test subtitles only
  43. },
  44. }, {
  45. 'url': 'https://openload.co/embed/kUEfGclsU9o/skyrim_no-audio_1080.mp4',
  46. 'only_matching': True,
  47. }, {
  48. 'url': 'https://openload.io/f/ZAn6oz-VZGE/',
  49. 'only_matching': True,
  50. }, {
  51. 'url': 'https://openload.co/f/_-ztPaZtMhM/',
  52. 'only_matching': True,
  53. }, {
  54. # unavailable via https://openload.co/f/Sxz5sADo82g/, different layout
  55. # for title and ext
  56. 'url': 'https://openload.co/embed/Sxz5sADo82g/',
  57. 'only_matching': True,
  58. }]
  59. def openload_decode(self, txt):
  60. symbol_dict = {
  61. '(゚Д゚) [゚Θ゚]': '_',
  62. '(゚Д゚) [゚ω゚ノ]': 'a',
  63. '(゚Д゚) [゚Θ゚ノ]': 'b',
  64. '(゚Д゚) [\'c\']': 'c',
  65. '(゚Д゚) [゚ー゚ノ]': 'd',
  66. '(゚Д゚) [゚Д゚ノ]': 'e',
  67. '(゚Д゚) [1]': 'f',
  68. '(゚Д゚) [\'o\']': 'o',
  69. '(o゚ー゚o)': 'u',
  70. '(゚Д゚) [\'c\']': 'c',
  71. '((゚ー゚) + (o^_^o))': '7',
  72. '((o^_^o) +(o^_^o) +(c^_^o))': '6',
  73. '((゚ー゚) + (゚Θ゚))': '5',
  74. '(-~3)': '4',
  75. '(-~-~1)': '3',
  76. '(-~1)': '2',
  77. '(-~0)': '1',
  78. '((c^_^o)-(c^_^o))': '0',
  79. }
  80. delim = '(゚Д゚)[゚ε゚]+'
  81. end_token = '(゚Д゚)[゚o゚]'
  82. symbols = '|'.join(map(re.escape, symbol_dict.keys()))
  83. txt = re.sub('(%s)\+\s?' % symbols, lambda m: symbol_dict[m.group(1)], txt)
  84. ret = ''
  85. for aacode in re.findall(r'{0}\+\s?{1}(.*?){0}'.format(re.escape(end_token), re.escape(delim)), txt):
  86. for aachar in aacode.split(delim):
  87. if aachar.isdigit():
  88. ret += compat_chr(int(aachar, 8))
  89. else:
  90. m = re.match(r'^u([\da-f]{4})$', aachar)
  91. if m:
  92. ret += compat_chr(int(m.group(1), 16))
  93. else:
  94. self.report_warning("Cannot decode: %s" % aachar)
  95. return ret
  96. def _real_extract(self, url):
  97. video_id = self._match_id(url)
  98. webpage = self._download_webpage('https://openload.co/embed/%s/' % video_id, video_id)
  99. if 'File not found' in webpage or 'deleted by the owner' in webpage:
  100. raise ExtractorError('File not found', expected=True)
  101. # The following decryption algorithm is written by @yokrysty and
  102. # declared to be freely used in youtube-dl
  103. # See https://github.com/rg3/youtube-dl/issues/10408
  104. enc_data = self._html_search_regex(
  105. r'<span[^>]*>([^<]+)</span>\s*<span[^>]*>[^<]+</span>\s*<span[^>]+id="streamurl"',
  106. webpage, 'encrypted data')
  107. enc_code = self._html_search_regex(r'<script[^>]+>(゚ω゚[^<]+)</script>',
  108. webpage, 'encrypted code')
  109. js_code = self.openload_decode(enc_code)
  110. jsi = JSInterpreter(js_code)
  111. m_offset_fun = self._search_regex(r'slice\(0\s*-\s*(%s)\(\)' % _NAME_RE, js_code, 'javascript offset function')
  112. m_diff_fun = self._search_regex(r'charCodeAt\(0\)\s*\+\s*(%s)\(\)' % _NAME_RE, js_code, 'javascript diff function')
  113. offset = jsi.call_function(m_offset_fun)
  114. diff = jsi.call_function(m_diff_fun)
  115. video_url_chars = []
  116. for idx, c in enumerate(enc_data):
  117. j = compat_ord(c)
  118. if j >= 33 and j <= 126:
  119. j = ((j + 14) % 94) + 33
  120. if idx == len(enc_data) - offset:
  121. j += diff
  122. video_url_chars += compat_chr(j)
  123. video_url = 'https://openload.co/stream/%s?mime=true' % ''.join(video_url_chars)
  124. title = self._og_search_title(webpage, default=None) or self._search_regex(
  125. r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage,
  126. 'title', default=None) or self._html_search_meta(
  127. 'description', webpage, 'title', fatal=True)
  128. entries = self._parse_html5_media_entries(url, webpage, video_id)
  129. subtitles = entries[0]['subtitles'] if entries else None
  130. info_dict = {
  131. 'id': video_id,
  132. 'title': title,
  133. 'thumbnail': self._og_search_thumbnail(webpage, default=None),
  134. 'url': video_url,
  135. # Seems all videos have extensions in their titles
  136. 'ext': determine_ext(title),
  137. 'subtitles': subtitles,
  138. }
  139. return info_dict