You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

140 lines
5.3 KiB

  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. from .common import InfoExtractor
  4. from ..utils import (
  5. clean_html,
  6. int_or_none,
  7. )
  8. class EllenTubeIE(InfoExtractor):
  9. _VALID_URL = r'''(?x)
  10. (?:
  11. https://api-prod\.ellentube\.com/ellenapi/api/item/
  12. |ellentube:
  13. )
  14. (?P<id>
  15. [\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}
  16. )'''
  17. _TESTS = [{
  18. 'url': 'https://api-prod.ellentube.com/ellenapi/api/item/75c64c16-aefd-4558-b4f5-3de09b22e6fc',
  19. 'match_only': True,
  20. }, {
  21. 'url': 'ellentube:734a3353-f697-4e79-9ca9-bfc3002dc1e0',
  22. 'match_only': True,
  23. }]
  24. def _real_extract(self, url):
  25. video_id = self._match_id(url)
  26. data = self._download_json(
  27. 'https://api-prod.ellentube.com/ellenapi/api/item/%s' % video_id, video_id)
  28. title = data['title']
  29. description = data.get('description')
  30. publish_time = int_or_none(data.get('publishTime'))
  31. thumbnail = data.get('thumbnail')
  32. formats = []
  33. duration = None
  34. for entry in data.get('media'):
  35. if entry.get('id') == 'm3u8':
  36. formats = self._extract_m3u8_formats(
  37. entry.get('url'), video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls')
  38. duration = int_or_none(entry.get('duration'))
  39. break
  40. self._sort_formats(formats)
  41. return {
  42. 'id': video_id,
  43. 'title': title,
  44. 'description': description,
  45. 'duration': duration,
  46. 'thumbnail': thumbnail,
  47. 'timestamp': publish_time,
  48. 'formats': formats,
  49. }
  50. class EllenTubeVideoIE(InfoExtractor):
  51. _VALID_URL = r'https?://(?:www\.)?ellentube\.com/video/(?P<id>.+)\.html'
  52. _TEST = {
  53. 'url': 'https://www.ellentube.com/video/ellen-meets-las-vegas-survivors-jesus-campos-and-stephen-schuck.html',
  54. 'md5': '2fabc277131bddafdd120e0fc0f974c9',
  55. 'info_dict': {
  56. 'id': '0822171c-3829-43bf-b99f-d77358ae75e3',
  57. 'ext': 'mp4',
  58. 'title': 'Ellen Meets Las Vegas Survivors Jesus Campos and Stephen Schuck',
  59. 'description': 'md5:76e3355e2242a78ad9e3858e5616923f',
  60. 'duration': 514,
  61. 'timestamp': 1508505120000,
  62. 'thumbnail': 'https://warnerbros-h.assetsadobe.com/is/image/content/dam/ellen/videos/episodes/season15/32/video--2728751654987218111',
  63. }
  64. }
  65. def _real_extract(self, url):
  66. display_id = self._match_id(url)
  67. webpage = self._download_webpage(url, display_id)
  68. video_id = self._html_search_regex(
  69. r'(?s)<!--\s*CONTENT\s*-->.*data-config.+([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})',
  70. webpage, 'video id')
  71. return self.url_result('ellentube:%s' % video_id, 'EllenTube')
  72. class EllenTubePlaylistIE(InfoExtractor):
  73. def _extract_videos_from_json(self, data, display_id):
  74. return [self.url_result('ellentube:%s' % elem['id'], 'EllenTube')
  75. for elem in data if elem.get('type') == 'VIDEO']
  76. def _extract_playlist(self, url, display_id, extract_description=True):
  77. webpage = self._download_webpage(url, display_id)
  78. playlist_data = self._html_search_regex(
  79. r'<div\s+data-component\s*=\s*"Details"(.+)</div>', webpage, 'playlist data')
  80. playlist_title = self._search_regex(
  81. r'"title"\s*:\s*"(.+?)"', playlist_data, 'playlist title')
  82. playlist_description = clean_html(self._search_regex(
  83. r'"description"\s*:\s*"(.+?)"', playlist_data, 'playlist description',
  84. fatal=False)) if extract_description else None
  85. api_search = self._search_regex(
  86. r'"filter"\s*:\s*"(.+?)"', playlist_data, 'playlist api request')
  87. api_data = self._download_json(
  88. 'https://api-prod.ellentube.com/ellenapi/api/feed/?%s' % api_search,
  89. display_id)
  90. return self.playlist_result(
  91. self._extract_videos_from_json(api_data, display_id),
  92. display_id, playlist_title, playlist_description)
  93. class EllenTubeEpisodeIE(EllenTubePlaylistIE):
  94. _VALID_URL = r'https?://(?:www\.)?ellentube\.com/episode/(?P<id>.+)\.html'
  95. _TEST = {
  96. 'url': 'https://www.ellentube.com/episode/dax-shepard-jordan-fisher-haim.html',
  97. 'info_dict': {
  98. 'id': 'dax-shepard-jordan-fisher-haim',
  99. 'title': 'Dax Shepard, \'DWTS\' Team Jordan Fisher & Lindsay Arnold, HAIM',
  100. 'description': 'md5:aed85d42892f6126e71ec5ed2aea2a0d'
  101. },
  102. 'playlist_count': 6,
  103. }
  104. def _real_extract(self, url):
  105. display_id = self._match_id(url)
  106. return self._extract_playlist(url, display_id)
  107. class EllenTubeStudioIE(EllenTubePlaylistIE):
  108. _VALID_URL = r'https?://(?:www\.)?ellentube\.com/studios/(?P<id>.+)\.html'
  109. _TEST = {
  110. 'url': 'https://www.ellentube.com/studios/macey-goes-rving0.html',
  111. 'info_dict': {
  112. 'id': 'macey-goes-rving0',
  113. 'title': 'Macey Goes RVing',
  114. },
  115. 'playlist_mincount': 3,
  116. }
  117. def _real_extract(self, url):
  118. display_id = self._match_id(url)
  119. return self._extract_playlist(url, display_id, False)