You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

169 lines
5.9 KiB

  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. from .common import InfoExtractor
  5. from ..utils import (
  6. xpath_text,
  7. xpath_element,
  8. int_or_none,
  9. parse_duration,
  10. urljoin,
  11. )
  12. class HBOIE(InfoExtractor):
  13. IE_NAME = 'hbo'
  14. _VALID_URL = r'https?://(?:www\.)?hbo\.com/(?:video|embed)(?:/[^/]+)*/(?P<id>[^/?#]+)'
  15. _TEST = {
  16. 'url': 'https://www.hbo.com/video/game-of-thrones/seasons/season-8/videos/trailer',
  17. 'md5': '8126210656f433c452a21367f9ad85b3',
  18. 'info_dict': {
  19. 'id': '22113301',
  20. 'ext': 'mp4',
  21. 'title': 'Game of Thrones - Trailer',
  22. },
  23. 'expected_warnings': ['Unknown MIME type application/mp4 in DASH manifest'],
  24. }
  25. _FORMATS_INFO = {
  26. 'pro7': {
  27. 'width': 1280,
  28. 'height': 720,
  29. },
  30. '1920': {
  31. 'width': 1280,
  32. 'height': 720,
  33. },
  34. 'pro6': {
  35. 'width': 768,
  36. 'height': 432,
  37. },
  38. '640': {
  39. 'width': 768,
  40. 'height': 432,
  41. },
  42. 'pro5': {
  43. 'width': 640,
  44. 'height': 360,
  45. },
  46. 'highwifi': {
  47. 'width': 640,
  48. 'height': 360,
  49. },
  50. 'high3g': {
  51. 'width': 640,
  52. 'height': 360,
  53. },
  54. 'medwifi': {
  55. 'width': 400,
  56. 'height': 224,
  57. },
  58. 'med3g': {
  59. 'width': 400,
  60. 'height': 224,
  61. },
  62. }
  63. def _real_extract(self, url):
  64. display_id = self._match_id(url)
  65. webpage = self._download_webpage(url, display_id)
  66. location_path = self._parse_json(self._html_search_regex(
  67. r'data-state="({.+?})"', webpage, 'state'), display_id)['video']['locationUrl']
  68. video_data = self._download_xml(urljoin(url, location_path), display_id)
  69. video_id = xpath_text(video_data, 'id', fatal=True)
  70. episode_title = title = xpath_text(video_data, 'title', fatal=True)
  71. series = xpath_text(video_data, 'program')
  72. if series:
  73. title = '%s - %s' % (series, title)
  74. formats = []
  75. for source in xpath_element(video_data, 'videos', 'sources', True):
  76. if source.tag == 'size':
  77. path = xpath_text(source, './/path')
  78. if not path:
  79. continue
  80. width = source.attrib.get('width')
  81. format_info = self._FORMATS_INFO.get(width, {})
  82. height = format_info.get('height')
  83. fmt = {
  84. 'url': path,
  85. 'format_id': 'http%s' % ('-%dp' % height if height else ''),
  86. 'width': format_info.get('width'),
  87. 'height': height,
  88. }
  89. rtmp = re.search(r'^(?P<url>rtmpe?://[^/]+/(?P<app>.+))/(?P<playpath>mp4:.+)$', path)
  90. if rtmp:
  91. fmt.update({
  92. 'url': rtmp.group('url'),
  93. 'play_path': rtmp.group('playpath'),
  94. 'app': rtmp.group('app'),
  95. 'ext': 'flv',
  96. 'format_id': fmt['format_id'].replace('http', 'rtmp'),
  97. })
  98. formats.append(fmt)
  99. else:
  100. video_url = source.text
  101. if not video_url:
  102. continue
  103. if source.tag == 'tarball':
  104. formats.extend(self._extract_m3u8_formats(
  105. video_url.replace('.tar', '/base_index_w8.m3u8'),
  106. video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
  107. elif source.tag == 'hls':
  108. m3u8_formats = self._extract_m3u8_formats(
  109. video_url.replace('.tar', '/base_index.m3u8'),
  110. video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
  111. for f in m3u8_formats:
  112. if f.get('vcodec') == 'none' and not f.get('tbr'):
  113. f['tbr'] = int_or_none(self._search_regex(
  114. r'-(\d+)k/', f['url'], 'tbr', default=None))
  115. formats.extend(m3u8_formats)
  116. elif source.tag == 'dash':
  117. formats.extend(self._extract_mpd_formats(
  118. video_url.replace('.tar', '/manifest.mpd'),
  119. video_id, mpd_id='dash', fatal=False))
  120. else:
  121. format_info = self._FORMATS_INFO.get(source.tag, {})
  122. formats.append({
  123. 'format_id': 'http-%s' % source.tag,
  124. 'url': video_url,
  125. 'width': format_info.get('width'),
  126. 'height': format_info.get('height'),
  127. })
  128. self._sort_formats(formats)
  129. thumbnails = []
  130. card_sizes = xpath_element(video_data, 'titleCardSizes')
  131. if card_sizes is not None:
  132. for size in card_sizes:
  133. path = xpath_text(size, 'path')
  134. if not path:
  135. continue
  136. width = int_or_none(size.get('width'))
  137. thumbnails.append({
  138. 'id': width,
  139. 'url': path,
  140. 'width': width,
  141. })
  142. subtitles = None
  143. caption_url = xpath_text(video_data, 'captionUrl')
  144. if caption_url:
  145. subtitles = {
  146. 'en': [{
  147. 'url': caption_url,
  148. 'ext': 'ttml'
  149. }],
  150. }
  151. return {
  152. 'id': video_id,
  153. 'title': title,
  154. 'duration': parse_duration(xpath_text(video_data, 'duration/tv14')),
  155. 'series': series,
  156. 'episode': episode_title,
  157. 'formats': formats,
  158. 'thumbnails': thumbnails,
  159. 'subtitles': subtitles,
  160. }