You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

124 lines
4.4 KiB

  1. from __future__ import unicode_literals
  2. from .common import InfoExtractor
  3. from ..compat import (
  4. compat_urllib_parse,
  5. compat_urllib_request,
  6. )
  7. from ..utils import (
  8. ExtractorError,
  9. js_to_json,
  10. )
  11. class EscapistIE(InfoExtractor):
  12. _VALID_URL = r'https?://?(www\.)?escapistmagazine\.com/videos/view/[^/?#]+/(?P<id>[0-9]+)-[^/?#]*(?:$|[?#])'
  13. _USER_AGENT = 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko'
  14. _TEST = {
  15. 'url': 'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate',
  16. 'md5': 'ab3a706c681efca53f0a35f1415cf0d1',
  17. 'info_dict': {
  18. 'id': '6618',
  19. 'ext': 'mp4',
  20. 'description': "Baldur's Gate: Original, Modded or Enhanced Edition? I'll break down what you can expect from the new Baldur's Gate: Enhanced Edition.",
  21. 'uploader_id': 'the-escapist-presents',
  22. 'uploader': 'The Escapist Presents',
  23. 'title': "Breaking Down Baldur's Gate",
  24. 'thumbnail': 're:^https?://.*\.jpg$',
  25. }
  26. }
  27. def _real_extract(self, url):
  28. video_id = self._match_id(url)
  29. webpage_req = compat_urllib_request.Request(url)
  30. webpage_req.add_header('User-Agent', self._USER_AGENT)
  31. webpage = self._download_webpage(webpage_req, video_id)
  32. uploader_id = self._html_search_regex(
  33. r"<h1\s+class='headline'>\s*<a\s+href='/videos/view/(.*?)'",
  34. webpage, 'uploader ID', fatal=False)
  35. uploader = self._html_search_regex(
  36. r"<h1\s+class='headline'>(.*?)</a>",
  37. webpage, 'uploader', fatal=False)
  38. description = self._html_search_meta('description', webpage)
  39. raw_title = self._html_search_meta('title', webpage, fatal=True)
  40. title = raw_title.partition(' : ')[2]
  41. config_url = compat_urllib_parse.unquote(self._html_search_regex(
  42. r'''(?x)
  43. (?:
  44. <param\s+name="flashvars".*?\s+value="config=|
  45. flashvars=&quot;config=
  46. )
  47. (https?://[^"&]+)
  48. ''',
  49. webpage, 'config URL'))
  50. formats = []
  51. ad_formats = []
  52. def _add_format(name, cfg_url, quality):
  53. cfg_req = compat_urllib_request.Request(cfg_url)
  54. cfg_req.add_header('User-Agent', self._USER_AGENT)
  55. config = self._download_json(
  56. cfg_req, video_id,
  57. 'Downloading ' + name + ' configuration',
  58. 'Unable to download ' + name + ' configuration',
  59. transform_source=js_to_json)
  60. playlist = config['playlist']
  61. for p in playlist:
  62. if p.get('eventCategory') == 'Video':
  63. ar = formats
  64. elif p.get('eventCategory') == 'Video Postroll':
  65. ar = ad_formats
  66. else:
  67. continue
  68. ar.append({
  69. 'url': p['url'],
  70. 'format_id': name,
  71. 'quality': quality,
  72. 'http_headers': {
  73. 'User-Agent': self._USER_AGENT,
  74. },
  75. })
  76. _add_format('normal', config_url, quality=0)
  77. hq_url = (config_url +
  78. ('&hq=1' if '?' in config_url else config_url + '?hq=1'))
  79. try:
  80. _add_format('hq', hq_url, quality=1)
  81. except ExtractorError:
  82. pass # That's fine, we'll just use normal quality
  83. self._sort_formats(formats)
  84. if '/escapist/sales-marketing/' in formats[-1]['url']:
  85. raise ExtractorError('This IP address has been blocked by The Escapist', expected=True)
  86. res = {
  87. 'id': video_id,
  88. 'formats': formats,
  89. 'uploader': uploader,
  90. 'uploader_id': uploader_id,
  91. 'title': title,
  92. 'thumbnail': self._og_search_thumbnail(webpage),
  93. 'description': description,
  94. }
  95. if self._downloader.params.get('include_ads') and ad_formats:
  96. self._sort_formats(ad_formats)
  97. ad_res = {
  98. 'id': '%s-ad' % video_id,
  99. 'title': '%s (Postroll)' % title,
  100. 'formats': ad_formats,
  101. }
  102. return {
  103. '_type': 'playlist',
  104. 'entries': [res, ad_res],
  105. 'title': title,
  106. 'id': video_id,
  107. }
  108. return res