You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

148 lines
5.5 KiB

  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. from .common import InfoExtractor
  5. from ..utils import (
  6. ExtractorError,
  7. int_or_none,
  8. strip_or_none,
  9. unescapeHTML,
  10. urlencode_postdata,
  11. )
  12. class RoosterTeethIE(InfoExtractor):
  13. _VALID_URL = r'https?://(?:.+?\.)?roosterteeth\.com/episode/(?P<id>[^/?#&]+)'
  14. _LOGIN_URL = 'https://roosterteeth.com/login'
  15. _NETRC_MACHINE = 'roosterteeth'
  16. _TESTS = [{
  17. 'url': 'http://roosterteeth.com/episode/million-dollars-but-season-2-million-dollars-but-the-game-announcement',
  18. 'md5': 'e2bd7764732d785ef797700a2489f212',
  19. 'info_dict': {
  20. 'id': '26576',
  21. 'display_id': 'million-dollars-but-season-2-million-dollars-but-the-game-announcement',
  22. 'ext': 'mp4',
  23. 'title': 'Million Dollars, But...: Million Dollars, But... The Game Announcement',
  24. 'description': 'md5:0cc3b21986d54ed815f5faeccd9a9ca5',
  25. 'thumbnail': 're:^https?://.*\.png$',
  26. 'series': 'Million Dollars, But...',
  27. 'episode': 'Million Dollars, But... The Game Announcement',
  28. 'comment_count': int,
  29. },
  30. }, {
  31. 'url': 'http://achievementhunter.roosterteeth.com/episode/off-topic-the-achievement-hunter-podcast-2016-i-didn-t-think-it-would-pass-31',
  32. 'only_matching': True,
  33. }, {
  34. 'url': 'http://funhaus.roosterteeth.com/episode/funhaus-shorts-2016-austin-sucks-funhaus-shorts',
  35. 'only_matching': True,
  36. }, {
  37. 'url': 'http://screwattack.roosterteeth.com/episode/death-battle-season-3-mewtwo-vs-shadow',
  38. 'only_matching': True,
  39. }, {
  40. 'url': 'http://theknow.roosterteeth.com/episode/the-know-game-news-season-1-boring-steam-sales-are-better',
  41. 'only_matching': True,
  42. }, {
  43. # only available for FIRST members
  44. 'url': 'http://roosterteeth.com/episode/rt-docs-the-world-s-greatest-head-massage-the-world-s-greatest-head-massage-an-asmr-journey-part-one',
  45. 'only_matching': True,
  46. }]
  47. def _login(self):
  48. (username, password) = self._get_login_info()
  49. if username is None:
  50. return
  51. login_page = self._download_webpage(
  52. self._LOGIN_URL, None,
  53. note='Downloading login page',
  54. errnote='Unable to download login page')
  55. login_form = self._hidden_inputs(login_page)
  56. login_form.update({
  57. 'username': username,
  58. 'password': password,
  59. })
  60. login_request = self._download_webpage(
  61. self._LOGIN_URL, None,
  62. note='Logging in as %s' % username,
  63. data=urlencode_postdata(login_form),
  64. headers={
  65. 'Referer': self._LOGIN_URL,
  66. })
  67. if not any(re.search(p, login_request) for p in (
  68. r'href=["\']https?://(?:www\.)?roosterteeth\.com/logout"',
  69. r'>Sign Out<')):
  70. error = self._html_search_regex(
  71. r'(?s)<div[^>]+class=(["\']).*?\balert-danger\b.*?\1[^>]*>(?:\s*<button[^>]*>.*?</button>)?(?P<error>.+?)</div>',
  72. login_request, 'alert', default=None, group='error')
  73. if error:
  74. raise ExtractorError('Unable to login: %s' % error, expected=True)
  75. raise ExtractorError('Unable to log in')
  76. def _real_initialize(self):
  77. self._login()
  78. def _real_extract(self, url):
  79. display_id = self._match_id(url)
  80. webpage = self._download_webpage(url, display_id)
  81. episode = strip_or_none(unescapeHTML(self._search_regex(
  82. (r'videoTitle\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1',
  83. r'<title>(?P<title>[^<]+)</title>'), webpage, 'title',
  84. default=None, group='title')))
  85. title = strip_or_none(self._og_search_title(
  86. webpage, default=None)) or episode
  87. m3u8_url = self._search_regex(
  88. r'file\s*:\s*(["\'])(?P<url>http.+?\.m3u8.*?)\1',
  89. webpage, 'm3u8 url', default=None, group='url')
  90. if not m3u8_url:
  91. if re.search(r'<div[^>]+class=["\']non-sponsor', webpage):
  92. self.raise_login_required(
  93. '%s is only available for FIRST members' % display_id)
  94. if re.search(r'<div[^>]+class=["\']golive-gate', webpage):
  95. self.raise_login_required('%s is not available yet' % display_id)
  96. raise ExtractorError('Unable to extract m3u8 URL')
  97. formats = self._extract_m3u8_formats(
  98. m3u8_url, display_id, ext='mp4',
  99. entry_protocol='m3u8_native', m3u8_id='hls')
  100. self._sort_formats(formats)
  101. description = strip_or_none(self._og_search_description(webpage))
  102. thumbnail = self._proto_relative_url(self._og_search_thumbnail(webpage))
  103. series = self._search_regex(
  104. (r'<h2>More ([^<]+)</h2>', r'<a[^>]+>See All ([^<]+) Videos<'),
  105. webpage, 'series', fatal=False)
  106. comment_count = int_or_none(self._search_regex(
  107. r'>Comments \((\d+)\)<', webpage,
  108. 'comment count', fatal=False))
  109. video_id = self._search_regex(
  110. (r'containerId\s*=\s*["\']episode-(\d+)\1',
  111. r'<div[^<]+id=["\']episode-(\d+)'), webpage,
  112. 'video id', default=display_id)
  113. return {
  114. 'id': video_id,
  115. 'display_id': display_id,
  116. 'title': title,
  117. 'description': description,
  118. 'thumbnail': thumbnail,
  119. 'series': series,
  120. 'episode': episode,
  121. 'comment_count': comment_count,
  122. 'formats': formats,
  123. }