You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

154 lines
5.4 KiB

  1. from __future__ import unicode_literals
  2. import re
  3. from .common import InfoExtractor
  4. from ..compat import (
  5. compat_urlparse,
  6. )
  7. from ..utils import (
  8. ExtractorError,
  9. int_or_none,
  10. float_or_none,
  11. )
  12. class UstreamIE(InfoExtractor):
  13. _VALID_URL = r'https?://(?:www\.)?ustream\.tv/(?P<type>recorded|embed|embed/recorded)/(?P<id>\d+)'
  14. IE_NAME = 'ustream'
  15. _TESTS = [{
  16. 'url': 'http://www.ustream.tv/recorded/20274954',
  17. 'md5': '088f151799e8f572f84eb62f17d73e5c',
  18. 'info_dict': {
  19. 'id': '20274954',
  20. 'ext': 'flv',
  21. 'title': 'Young Americans for Liberty February 7, 2012 2:28 AM',
  22. 'description': 'Young Americans for Liberty February 7, 2012 2:28 AM',
  23. 'timestamp': 1328577035,
  24. 'upload_date': '20120207',
  25. 'uploader': 'yaliberty',
  26. 'uploader_id': '6780869',
  27. },
  28. }, {
  29. # From http://sportscanada.tv/canadagames/index.php/week2/figure-skating/444
  30. # Title and uploader available only from params JSON
  31. 'url': 'http://www.ustream.tv/embed/recorded/59307601?ub=ff0000&lc=ff0000&oc=ffffff&uc=ffffff&v=3&wmode=direct',
  32. 'md5': '5a2abf40babeac9812ed20ae12d34e10',
  33. 'info_dict': {
  34. 'id': '59307601',
  35. 'ext': 'flv',
  36. 'title': '-CG11- Canada Games Figure Skating',
  37. 'uploader': 'sportscanadatv',
  38. },
  39. 'skip': 'This Pro Broadcaster has chosen to remove this video from the ustream.tv site.',
  40. }, {
  41. 'url': 'http://www.ustream.tv/embed/10299409',
  42. 'info_dict': {
  43. 'id': '10299409',
  44. },
  45. 'playlist_count': 3,
  46. }]
  47. def _real_extract(self, url):
  48. m = re.match(self._VALID_URL, url)
  49. video_id = m.group('id')
  50. # some sites use this embed format (see: https://github.com/rg3/youtube-dl/issues/2990)
  51. if m.group('type') == 'embed/recorded':
  52. video_id = m.group('id')
  53. desktop_url = 'http://www.ustream.tv/recorded/' + video_id
  54. return self.url_result(desktop_url, 'Ustream')
  55. if m.group('type') == 'embed':
  56. video_id = m.group('id')
  57. webpage = self._download_webpage(url, video_id)
  58. content_video_ids = self._parse_json(self._search_regex(
  59. r'ustream\.vars\.offAirContentVideoIds=([^;]+);', webpage,
  60. 'content video IDs'), video_id)
  61. return self.playlist_result(
  62. map(lambda u: self.url_result('http://www.ustream.tv/recorded/' + u, 'Ustream'), content_video_ids),
  63. video_id)
  64. params = self._download_json(
  65. 'https://api.ustream.tv/videos/%s.json' % video_id, video_id)
  66. error = params.get('error')
  67. if error:
  68. raise ExtractorError(
  69. '%s returned error: %s' % (self.IE_NAME, error), expected=True)
  70. video = params['video']
  71. title = video['title']
  72. filesize = float_or_none(video.get('file_size'))
  73. formats = [{
  74. 'id': video_id,
  75. 'url': video_url,
  76. 'ext': format_id,
  77. 'filesize': filesize,
  78. } for format_id, video_url in video['media_urls'].items()]
  79. self._sort_formats(formats)
  80. description = video.get('description')
  81. timestamp = int_or_none(video.get('created_at'))
  82. duration = float_or_none(video.get('length'))
  83. view_count = int_or_none(video.get('views'))
  84. uploader = video.get('owner', {}).get('username')
  85. uploader_id = video.get('owner', {}).get('id')
  86. thumbnails = [{
  87. 'id': thumbnail_id,
  88. 'url': thumbnail_url,
  89. } for thumbnail_id, thumbnail_url in video.get('thumbnail', {}).items()]
  90. return {
  91. 'id': video_id,
  92. 'title': title,
  93. 'description': description,
  94. 'thumbnails': thumbnails,
  95. 'timestamp': timestamp,
  96. 'duration': duration,
  97. 'view_count': view_count,
  98. 'uploader': uploader,
  99. 'uploader_id': uploader_id,
  100. 'formats': formats,
  101. }
  102. class UstreamChannelIE(InfoExtractor):
  103. _VALID_URL = r'https?://(?:www\.)?ustream\.tv/channel/(?P<slug>.+)'
  104. IE_NAME = 'ustream:channel'
  105. _TEST = {
  106. 'url': 'http://www.ustream.tv/channel/channeljapan',
  107. 'info_dict': {
  108. 'id': '10874166',
  109. },
  110. 'playlist_mincount': 17,
  111. }
  112. def _real_extract(self, url):
  113. m = re.match(self._VALID_URL, url)
  114. display_id = m.group('slug')
  115. webpage = self._download_webpage(url, display_id)
  116. channel_id = self._html_search_meta('ustream:channel_id', webpage)
  117. BASE = 'http://www.ustream.tv'
  118. next_url = '/ajax/socialstream/videos/%s/1.json' % channel_id
  119. video_ids = []
  120. while next_url:
  121. reply = self._download_json(
  122. compat_urlparse.urljoin(BASE, next_url), display_id,
  123. note='Downloading video information (next: %d)' % (len(video_ids) + 1))
  124. video_ids.extend(re.findall(r'data-content-id="(\d.*)"', reply['data']))
  125. next_url = reply['nextUrl']
  126. entries = [
  127. self.url_result('http://www.ustream.tv/recorded/' + vid, 'Ustream')
  128. for vid in video_ids]
  129. return {
  130. '_type': 'playlist',
  131. 'id': channel_id,
  132. 'display_id': display_id,
  133. 'entries': entries,
  134. }