You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

183 lines
6.6 KiB

11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
  1. from __future__ import unicode_literals
  2. import datetime
  3. import json
  4. import re
  5. import socket
  6. from .common import InfoExtractor
  7. from .subtitles import SubtitlesInfoExtractor
  8. from ..utils import (
  9. compat_http_client,
  10. compat_str,
  11. compat_urllib_error,
  12. compat_urllib_request,
  13. ExtractorError,
  14. unescapeHTML,
  15. )
  16. class BlipTVIE(SubtitlesInfoExtractor):
  17. """Information extractor for blip.tv"""
  18. _VALID_URL = r'https?://(?:\w+\.)?blip\.tv/((.+/)|(play/)|(api\.swf#))(?P<presumptive_id>.+)$'
  19. _TESTS = [{
  20. 'url': 'http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352',
  21. 'md5': 'c6934ad0b6acf2bd920720ec888eb812',
  22. 'info_dict': {
  23. 'id': '5779306',
  24. 'ext': 'mov',
  25. 'upload_date': '20111205',
  26. 'description': 'md5:9bc31f227219cde65e47eeec8d2dc596',
  27. 'uploader': 'Comic Book Resources - CBR TV',
  28. 'title': 'CBR EXCLUSIVE: "Gotham City Imposters" Bats VS Jokerz Short 3',
  29. }
  30. }, {
  31. # https://github.com/rg3/youtube-dl/pull/2274
  32. 'note': 'Video with subtitles',
  33. 'url': 'http://blip.tv/play/h6Uag5OEVgI.html',
  34. 'md5': '309f9d25b820b086ca163ffac8031806',
  35. 'info_dict': {
  36. 'id': '6586561',
  37. 'ext': 'mp4',
  38. 'uploader': 'Red vs. Blue',
  39. 'description': 'One-Zero-One',
  40. 'upload_date': '20130614',
  41. 'title': 'Red vs. Blue Season 11 Episode 1',
  42. }
  43. }]
  44. def _real_extract(self, url):
  45. mobj = re.match(self._VALID_URL, url)
  46. presumptive_id = mobj.group('presumptive_id')
  47. # See https://github.com/rg3/youtube-dl/issues/857
  48. embed_mobj = re.match(r'https?://(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)([a-zA-Z0-9]+)', url)
  49. if embed_mobj:
  50. info_url = 'http://blip.tv/play/%s.x?p=1' % embed_mobj.group(1)
  51. info_page = self._download_webpage(info_url, embed_mobj.group(1))
  52. video_id = self._search_regex(
  53. r'data-episode-id="([0-9]+)', info_page, 'video_id')
  54. return self.url_result('http://blip.tv/a/a-' + video_id, 'BlipTV')
  55. cchar = '&' if '?' in url else '?'
  56. json_url = url + cchar + 'skin=json&version=2&no_wrap=1'
  57. request = compat_urllib_request.Request(json_url)
  58. request.add_header('User-Agent', 'iTunes/10.6.1')
  59. json_data = self._download_json(request, video_id=presumptive_id)
  60. if 'Post' in json_data:
  61. data = json_data['Post']
  62. else:
  63. data = json_data
  64. video_id = compat_str(data['item_id'])
  65. upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d')
  66. subtitles = {}
  67. formats = []
  68. if 'additionalMedia' in data:
  69. for f in data['additionalMedia']:
  70. if f.get('file_type_srt') == 1:
  71. LANGS = {
  72. 'english': 'en',
  73. }
  74. lang = f['role'].rpartition('-')[-1].strip().lower()
  75. langcode = LANGS.get(lang, lang)
  76. subtitles[langcode] = f['url']
  77. continue
  78. if not int(f['media_width']): # filter m3u8
  79. continue
  80. formats.append({
  81. 'url': f['url'],
  82. 'format_id': f['role'],
  83. 'width': int(f['media_width']),
  84. 'height': int(f['media_height']),
  85. })
  86. else:
  87. formats.append({
  88. 'url': data['media']['url'],
  89. 'width': int(data['media']['width']),
  90. 'height': int(data['media']['height']),
  91. })
  92. self._sort_formats(formats)
  93. # subtitles
  94. video_subtitles = self.extract_subtitles(video_id, subtitles)
  95. if self._downloader.params.get('listsubtitles', False):
  96. self._list_available_subtitles(video_id, subtitles)
  97. return
  98. return {
  99. 'id': video_id,
  100. 'uploader': data['display_name'],
  101. 'upload_date': upload_date,
  102. 'title': data['title'],
  103. 'thumbnail': data['thumbnailUrl'],
  104. 'description': data['description'],
  105. 'user_agent': 'iTunes/10.6.1',
  106. 'formats': formats,
  107. 'subtitles': video_subtitles,
  108. }
  109. def _download_subtitle_url(self, sub_lang, url):
  110. # For some weird reason, blip.tv serves a video instead of subtitles
  111. # when we request with a common UA
  112. req = compat_urllib_request.Request(url)
  113. req.add_header('Youtubedl-user-agent', 'youtube-dl')
  114. return self._download_webpage(req, None, note=False)
  115. class BlipTVUserIE(InfoExtractor):
  116. _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?blip\.tv/)|bliptvuser:)([^/]+)/*$'
  117. _PAGE_SIZE = 12
  118. IE_NAME = 'blip.tv:user'
  119. def _real_extract(self, url):
  120. mobj = re.match(self._VALID_URL, url)
  121. username = mobj.group(1)
  122. page_base = 'http://m.blip.tv/pr/show_get_full_episode_list?users_id=%s&lite=0&esi=1'
  123. page = self._download_webpage(url, username, 'Downloading user page')
  124. mobj = re.search(r'data-users-id="([^"]+)"', page)
  125. page_base = page_base % mobj.group(1)
  126. # Download video ids using BlipTV Ajax calls. Result size per
  127. # query is limited (currently to 12 videos) so we need to query
  128. # page by page until there are no video ids - it means we got
  129. # all of them.
  130. video_ids = []
  131. pagenum = 1
  132. while True:
  133. url = page_base + "&page=" + str(pagenum)
  134. page = self._download_webpage(
  135. url, username, 'Downloading video ids from page %d' % pagenum)
  136. # Extract video identifiers
  137. ids_in_page = []
  138. for mobj in re.finditer(r'href="/([^"]+)"', page):
  139. if mobj.group(1) not in ids_in_page:
  140. ids_in_page.append(unescapeHTML(mobj.group(1)))
  141. video_ids.extend(ids_in_page)
  142. # A little optimization - if current page is not
  143. # "full", ie. does not contain PAGE_SIZE video ids then
  144. # we can assume that this page is the last one - there
  145. # are no more ids on further pages - no need to query
  146. # again.
  147. if len(ids_in_page) < self._PAGE_SIZE:
  148. break
  149. pagenum += 1
  150. urls = ['http://blip.tv/%s' % video_id for video_id in video_ids]
  151. url_entries = [self.url_result(vurl, 'BlipTV') for vurl in urls]
  152. return [self.playlist_result(url_entries, playlist_title=username)]