You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

437 lines
15 KiB

  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import json
  4. import os
  5. import random
  6. import re
  7. import subprocess
  8. import tempfile
  9. from .common import InfoExtractor
  10. from ..compat import (
  11. compat_urlparse,
  12. compat_kwargs,
  13. )
  14. from ..utils import (
  15. check_executable,
  16. determine_ext,
  17. encodeArgument,
  18. ExtractorError,
  19. get_element_by_id,
  20. get_exe_version,
  21. is_outdated_version,
  22. std_headers,
  23. )
  24. def cookie_to_dict(cookie):
  25. cookie_dict = {
  26. 'name': cookie.name,
  27. 'value': cookie.value,
  28. }
  29. if cookie.port_specified:
  30. cookie_dict['port'] = cookie.port
  31. if cookie.domain_specified:
  32. cookie_dict['domain'] = cookie.domain
  33. if cookie.path_specified:
  34. cookie_dict['path'] = cookie.path
  35. if cookie.expires is not None:
  36. cookie_dict['expires'] = cookie.expires
  37. if cookie.secure is not None:
  38. cookie_dict['secure'] = cookie.secure
  39. if cookie.discard is not None:
  40. cookie_dict['discard'] = cookie.discard
  41. try:
  42. if (cookie.has_nonstandard_attr('httpOnly') or
  43. cookie.has_nonstandard_attr('httponly') or
  44. cookie.has_nonstandard_attr('HttpOnly')):
  45. cookie_dict['httponly'] = True
  46. except TypeError:
  47. pass
  48. return cookie_dict
  49. def cookie_jar_to_list(cookie_jar):
  50. return [cookie_to_dict(cookie) for cookie in cookie_jar]
  51. class PhantomJSwrapper(object):
  52. """PhantomJS wrapper class
  53. This class is experimental.
  54. """
  55. _TEMPLATE = r'''
  56. phantom.onError = function(msg, trace) {{
  57. var msgStack = ['PHANTOM ERROR: ' + msg];
  58. if(trace && trace.length) {{
  59. msgStack.push('TRACE:');
  60. trace.forEach(function(t) {{
  61. msgStack.push(' -> ' + (t.file || t.sourceURL) + ': ' + t.line
  62. + (t.function ? ' (in function ' + t.function +')' : ''));
  63. }});
  64. }}
  65. console.error(msgStack.join('\n'));
  66. phantom.exit(1);
  67. }};
  68. var page = require('webpage').create();
  69. var fs = require('fs');
  70. var read = {{ mode: 'r', charset: 'utf-8' }};
  71. var write = {{ mode: 'w', charset: 'utf-8' }};
  72. JSON.parse(fs.read("{cookies}", read)).forEach(function(x) {{
  73. phantom.addCookie(x);
  74. }});
  75. page.settings.resourceTimeout = {timeout};
  76. page.settings.userAgent = "{ua}";
  77. page.onLoadStarted = function() {{
  78. page.evaluate(function() {{
  79. delete window._phantom;
  80. delete window.callPhantom;
  81. }});
  82. }};
  83. var saveAndExit = function() {{
  84. fs.write("{html}", page.content, write);
  85. fs.write("{cookies}", JSON.stringify(phantom.cookies), write);
  86. phantom.exit();
  87. }};
  88. page.onLoadFinished = function(status) {{
  89. if(page.url === "") {{
  90. page.setContent(fs.read("{html}", read), "{url}");
  91. }}
  92. else {{
  93. {jscode}
  94. }}
  95. }};
  96. page.open("");
  97. '''
  98. _TMP_FILE_NAMES = ['script', 'html', 'cookies']
  99. @staticmethod
  100. def _version():
  101. return get_exe_version('phantomjs', version_re=r'([0-9.]+)')
  102. def __init__(self, extractor, required_version=None, timeout=10000):
  103. self._TMP_FILES = {}
  104. self.exe = check_executable('phantomjs', ['-v'])
  105. if not self.exe:
  106. raise ExtractorError('PhantomJS executable not found in PATH, '
  107. 'download it from http://phantomjs.org',
  108. expected=True)
  109. self.extractor = extractor
  110. if required_version:
  111. version = self._version()
  112. if is_outdated_version(version, required_version):
  113. self.extractor._downloader.report_warning(
  114. 'Your copy of PhantomJS is outdated, update it to version '
  115. '%s or newer if you encounter any errors.' % required_version)
  116. self.options = {
  117. 'timeout': timeout,
  118. }
  119. for name in self._TMP_FILE_NAMES:
  120. tmp = tempfile.NamedTemporaryFile(delete=False)
  121. tmp.close()
  122. self._TMP_FILES[name] = tmp
  123. def __del__(self):
  124. for name in self._TMP_FILE_NAMES:
  125. try:
  126. os.remove(self._TMP_FILES[name].name)
  127. except (IOError, OSError, KeyError):
  128. pass
  129. def _save_cookies(self, url):
  130. cookies = cookie_jar_to_list(self.extractor._downloader.cookiejar)
  131. for cookie in cookies:
  132. if 'path' not in cookie:
  133. cookie['path'] = '/'
  134. if 'domain' not in cookie:
  135. cookie['domain'] = compat_urlparse.urlparse(url).netloc
  136. with open(self._TMP_FILES['cookies'].name, 'wb') as f:
  137. f.write(json.dumps(cookies).encode('utf-8'))
  138. def _load_cookies(self):
  139. with open(self._TMP_FILES['cookies'].name, 'rb') as f:
  140. cookies = json.loads(f.read().decode('utf-8'))
  141. for cookie in cookies:
  142. if cookie['httponly'] is True:
  143. cookie['rest'] = {'httpOnly': None}
  144. if 'expiry' in cookie:
  145. cookie['expire_time'] = cookie['expiry']
  146. self.extractor._set_cookie(**compat_kwargs(cookie))
  147. def get(self, url, html=None, video_id=None, note=None, note2='Executing JS on webpage', headers={}, jscode='saveAndExit();'):
  148. """
  149. Downloads webpage (if needed) and executes JS
  150. Params:
  151. url: website url
  152. html: optional, html code of website
  153. video_id: video id
  154. note: optional, displayed when downloading webpage
  155. note2: optional, displayed when executing JS
  156. headers: custom http headers
  157. jscode: code to be executed when page is loaded
  158. Returns tuple with:
  159. * downloaded website (after JS execution)
  160. * anything you print with `console.log` (but not inside `page.execute`!)
  161. In most cases you don't need to add any `jscode`.
  162. It is executed in `page.onLoadFinished`.
  163. `saveAndExit();` is mandatory, use it instead of `phantom.exit()`
  164. It is possible to wait for some element on the webpage, for example:
  165. var check = function() {
  166. var elementFound = page.evaluate(function() {
  167. return document.querySelector('#b.done') !== null;
  168. });
  169. if(elementFound)
  170. saveAndExit();
  171. else
  172. window.setTimeout(check, 500);
  173. }
  174. page.evaluate(function(){
  175. document.querySelector('#a').click();
  176. });
  177. check();
  178. """
  179. if 'saveAndExit();' not in jscode:
  180. raise ExtractorError('`saveAndExit();` not found in `jscode`')
  181. if not html:
  182. html = self.extractor._download_webpage(url, video_id, note=note, headers=headers)
  183. with open(self._TMP_FILES['html'].name, 'wb') as f:
  184. f.write(html.encode('utf-8'))
  185. self._save_cookies(url)
  186. replaces = self.options
  187. replaces['url'] = url
  188. user_agent = headers.get('User-Agent') or std_headers['User-Agent']
  189. replaces['ua'] = user_agent.replace('"', '\\"')
  190. replaces['jscode'] = jscode
  191. for x in self._TMP_FILE_NAMES:
  192. replaces[x] = self._TMP_FILES[x].name.replace('\\', '\\\\').replace('"', '\\"')
  193. with open(self._TMP_FILES['script'].name, 'wb') as f:
  194. f.write(self._TEMPLATE.format(**replaces).encode('utf-8'))
  195. if video_id is None:
  196. self.extractor.to_screen('%s' % (note2,))
  197. else:
  198. self.extractor.to_screen('%s: %s' % (video_id, note2))
  199. p = subprocess.Popen([
  200. self.exe, '--ssl-protocol=any',
  201. self._TMP_FILES['script'].name
  202. ], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
  203. out, err = p.communicate()
  204. if p.returncode != 0:
  205. raise ExtractorError(
  206. 'Executing JS failed\n:' + encodeArgument(err))
  207. with open(self._TMP_FILES['html'].name, 'rb') as f:
  208. html = f.read().decode('utf-8')
  209. self._load_cookies()
  210. return (html, encodeArgument(out))
  211. class OpenloadIE(InfoExtractor):
  212. _DOMAINS = r'(?:openload\.(?:co|io|link|pw)|oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|pw|live|space)|oladblock\.(?:services|xyz|me)|openloed\.co)'
  213. _VALID_URL = r'''(?x)
  214. https?://
  215. (?P<host>
  216. (?:www\.)?
  217. %s
  218. )/
  219. (?:f|embed)/
  220. (?P<id>[a-zA-Z0-9-_]+)
  221. ''' % _DOMAINS
  222. _TESTS = [{
  223. 'url': 'https://openload.co/f/kUEfGclsU9o',
  224. 'md5': 'bf1c059b004ebc7a256f89408e65c36e',
  225. 'info_dict': {
  226. 'id': 'kUEfGclsU9o',
  227. 'ext': 'mp4',
  228. 'title': 'skyrim_no-audio_1080.mp4',
  229. 'thumbnail': r're:^https?://.*\.jpg$',
  230. },
  231. }, {
  232. 'url': 'https://openload.co/embed/rjC09fkPLYs',
  233. 'info_dict': {
  234. 'id': 'rjC09fkPLYs',
  235. 'ext': 'mp4',
  236. 'title': 'movie.mp4',
  237. 'thumbnail': r're:^https?://.*\.jpg$',
  238. 'subtitles': {
  239. 'en': [{
  240. 'ext': 'vtt',
  241. }],
  242. },
  243. },
  244. 'params': {
  245. 'skip_download': True, # test subtitles only
  246. },
  247. }, {
  248. 'url': 'https://openload.co/embed/kUEfGclsU9o/skyrim_no-audio_1080.mp4',
  249. 'only_matching': True,
  250. }, {
  251. 'url': 'https://openload.io/f/ZAn6oz-VZGE/',
  252. 'only_matching': True,
  253. }, {
  254. 'url': 'https://openload.co/f/_-ztPaZtMhM/',
  255. 'only_matching': True,
  256. }, {
  257. # unavailable via https://openload.co/f/Sxz5sADo82g/, different layout
  258. # for title and ext
  259. 'url': 'https://openload.co/embed/Sxz5sADo82g/',
  260. 'only_matching': True,
  261. }, {
  262. # unavailable via https://openload.co/embed/e-Ixz9ZR5L0/ but available
  263. # via https://openload.co/f/e-Ixz9ZR5L0/
  264. 'url': 'https://openload.co/f/e-Ixz9ZR5L0/',
  265. 'only_matching': True,
  266. }, {
  267. 'url': 'https://oload.tv/embed/KnG-kKZdcfY/',
  268. 'only_matching': True,
  269. }, {
  270. 'url': 'http://www.openload.link/f/KnG-kKZdcfY',
  271. 'only_matching': True,
  272. }, {
  273. 'url': 'https://oload.stream/f/KnG-kKZdcfY',
  274. 'only_matching': True,
  275. }, {
  276. 'url': 'https://oload.xyz/f/WwRBpzW8Wtk',
  277. 'only_matching': True,
  278. }, {
  279. 'url': 'https://oload.win/f/kUEfGclsU9o',
  280. 'only_matching': True,
  281. }, {
  282. 'url': 'https://oload.download/f/kUEfGclsU9o',
  283. 'only_matching': True,
  284. }, {
  285. 'url': 'https://oload.cloud/f/4ZDnBXRWiB8',
  286. 'only_matching': True,
  287. }, {
  288. # Its title has not got its extension but url has it
  289. 'url': 'https://oload.download/f/N4Otkw39VCw/Tomb.Raider.2018.HDRip.XviD.AC3-EVO.avi.mp4',
  290. 'only_matching': True,
  291. }, {
  292. 'url': 'https://oload.cc/embed/5NEAbI2BDSk',
  293. 'only_matching': True,
  294. }, {
  295. 'url': 'https://oload.icu/f/-_i4y_F_Hs8',
  296. 'only_matching': True,
  297. }, {
  298. 'url': 'https://oload.fun/f/gb6G1H4sHXY',
  299. 'only_matching': True,
  300. }, {
  301. 'url': 'https://oload.club/f/Nr1L-aZ2dbQ',
  302. 'only_matching': True,
  303. }, {
  304. 'url': 'https://oload.info/f/5NEAbI2BDSk',
  305. 'only_matching': True,
  306. }, {
  307. 'url': 'https://openload.pw/f/WyKgK8s94N0',
  308. 'only_matching': True,
  309. }, {
  310. 'url': 'https://oload.pw/f/WyKgK8s94N0',
  311. 'only_matching': True,
  312. }, {
  313. 'url': 'https://oload.live/f/-Z58UZ-GR4M',
  314. 'only_matching': True,
  315. }, {
  316. 'url': 'https://oload.space/f/IY4eZSst3u8/',
  317. 'only_matching': True,
  318. }, {
  319. 'url': 'https://oladblock.services/f/b8NWEgkqNLI/',
  320. 'only_matching': True,
  321. }, {
  322. 'url': 'https://oladblock.xyz/f/b8NWEgkqNLI/',
  323. 'only_matching': True,
  324. }, {
  325. 'url': 'https://oladblock.me/f/b8NWEgkqNLI/',
  326. 'only_matching': True,
  327. }, {
  328. 'url': 'https://openloed.co/f/b8NWEgkqNLI/',
  329. 'only_matching': True,
  330. }]
  331. _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{major}.0.{build}.{patch} Safari/537.36'
  332. @staticmethod
  333. def _extract_urls(webpage):
  334. return re.findall(
  335. r'<iframe[^>]+src=["\']((?:https?://)?%s/embed/[a-zA-Z0-9-_]+)'
  336. % OpenloadIE._DOMAINS, webpage)
  337. def _real_extract(self, url):
  338. mobj = re.match(self._VALID_URL, url)
  339. host = mobj.group('host')
  340. video_id = mobj.group('id')
  341. url_pattern = 'https://%s/%%s/%s/' % (host, video_id)
  342. headers = {
  343. 'User-Agent': self._USER_AGENT_TPL % {
  344. 'major': random.randint(63, 73),
  345. 'build': random.randint(3239, 3683),
  346. 'patch': random.randint(0, 100),
  347. },
  348. }
  349. for path in ('embed', 'f'):
  350. page_url = url_pattern % path
  351. last = path == 'f'
  352. webpage = self._download_webpage(
  353. page_url, video_id, 'Downloading %s webpage' % path,
  354. headers=headers, fatal=last)
  355. if not webpage:
  356. continue
  357. if 'File not found' in webpage or 'deleted by the owner' in webpage:
  358. if not last:
  359. continue
  360. raise ExtractorError('File not found', expected=True, video_id=video_id)
  361. break
  362. phantom = PhantomJSwrapper(self, required_version='2.0')
  363. webpage, _ = phantom.get(page_url, html=webpage, video_id=video_id, headers=headers)
  364. decoded_id = (get_element_by_id('streamurl', webpage) or
  365. get_element_by_id('streamuri', webpage) or
  366. get_element_by_id('streamurj', webpage) or
  367. self._search_regex(
  368. (r'>\s*([\w-]+~\d{10,}~\d+\.\d+\.0\.0~[\w-]+)\s*<',
  369. r'>\s*([\w~-]+~\d+\.\d+\.\d+\.\d+~[\w~-]+)',
  370. r'>\s*([\w-]+~\d{10,}~(?:[a-f\d]+:){2}:~[\w-]+)\s*<',
  371. r'>\s*([\w~-]+~[a-f0-9:]+~[\w~-]+)\s*<',
  372. r'>\s*([\w~-]+~[a-f0-9:]+~[\w~-]+)'), webpage,
  373. 'stream URL'))
  374. video_url = 'https://%s/stream/%s?mime=true' % (host, decoded_id)
  375. title = self._og_search_title(webpage, default=None) or self._search_regex(
  376. r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage,
  377. 'title', default=None) or self._html_search_meta(
  378. 'description', webpage, 'title', fatal=True)
  379. entries = self._parse_html5_media_entries(page_url, webpage, video_id)
  380. entry = entries[0] if entries else {}
  381. subtitles = entry.get('subtitles')
  382. return {
  383. 'id': video_id,
  384. 'title': title,
  385. 'thumbnail': entry.get('thumbnail') or self._og_search_thumbnail(webpage, default=None),
  386. 'url': video_url,
  387. 'ext': determine_ext(title, None) or determine_ext(url, 'mp4'),
  388. 'subtitles': subtitles,
  389. 'http_headers': headers,
  390. }