You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

193 lines
7.3 KiB

  1. import datetime
  2. import json
  3. import os
  4. import re
  5. import socket
  6. from .common import InfoExtractor
  7. from ..utils import (
  8. compat_http_client,
  9. compat_parse_qs,
  10. compat_str,
  11. compat_urllib_error,
  12. compat_urllib_parse_urlparse,
  13. compat_urllib_request,
  14. ExtractorError,
  15. unescapeHTML,
  16. )
  17. class BlipTVIE(InfoExtractor):
  18. """Information extractor for blip.tv"""
  19. _VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip\.tv/((.+/)|(play/)|(api\.swf#))(.+)$'
  20. _URL_EXT = r'^.*\.([a-z0-9]+)$'
  21. IE_NAME = u'blip.tv'
  22. _TEST = {
  23. u'url': u'http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352',
  24. u'file': u'5779306.m4v',
  25. u'md5': u'80baf1ec5c3d2019037c1c707d676b9f',
  26. u'info_dict': {
  27. u"upload_date": u"20111205",
  28. u"description": u"md5:9bc31f227219cde65e47eeec8d2dc596",
  29. u"uploader": u"Comic Book Resources - CBR TV",
  30. u"title": u"CBR EXCLUSIVE: \"Gotham City Imposters\" Bats VS Jokerz Short 3"
  31. }
  32. }
  33. def report_direct_download(self, title):
  34. """Report information extraction."""
  35. self.to_screen(u'%s: Direct download detected' % title)
  36. def _real_extract(self, url):
  37. mobj = re.match(self._VALID_URL, url)
  38. if mobj is None:
  39. raise ExtractorError(u'Invalid URL: %s' % url)
  40. # See https://github.com/rg3/youtube-dl/issues/857
  41. api_mobj = re.match(r'http://a\.blip\.tv/api\.swf#(?P<video_id>[\d\w]+)', url)
  42. if api_mobj is not None:
  43. url = 'http://blip.tv/play/g_%s' % api_mobj.group('video_id')
  44. urlp = compat_urllib_parse_urlparse(url)
  45. if urlp.path.startswith('/play/'):
  46. request = compat_urllib_request.Request(url)
  47. response = compat_urllib_request.urlopen(request)
  48. redirecturl = response.geturl()
  49. rurlp = compat_urllib_parse_urlparse(redirecturl)
  50. file_id = compat_parse_qs(rurlp.fragment)['file'][0].rpartition('/')[2]
  51. url = 'http://blip.tv/a/a-' + file_id
  52. return self._real_extract(url)
  53. if '?' in url:
  54. cchar = '&'
  55. else:
  56. cchar = '?'
  57. json_url = url + cchar + 'skin=json&version=2&no_wrap=1'
  58. request = compat_urllib_request.Request(json_url)
  59. request.add_header('User-Agent', 'iTunes/10.6.1')
  60. self.report_extraction(mobj.group(1))
  61. info = None
  62. try:
  63. urlh = compat_urllib_request.urlopen(request)
  64. if urlh.headers.get('Content-Type', '').startswith('video/'): # Direct download
  65. basename = url.split('/')[-1]
  66. title,ext = os.path.splitext(basename)
  67. title = title.decode('UTF-8')
  68. ext = ext.replace('.', '')
  69. self.report_direct_download(title)
  70. info = {
  71. 'id': title,
  72. 'url': url,
  73. 'uploader': None,
  74. 'upload_date': None,
  75. 'title': title,
  76. 'ext': ext,
  77. 'urlhandle': urlh
  78. }
  79. except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
  80. raise ExtractorError(u'ERROR: unable to download video info webpage: %s' % compat_str(err))
  81. if info is None: # Regular URL
  82. try:
  83. json_code_bytes = urlh.read()
  84. json_code = json_code_bytes.decode('utf-8')
  85. except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
  86. raise ExtractorError(u'Unable to read video info webpage: %s' % compat_str(err))
  87. try:
  88. json_data = json.loads(json_code)
  89. if 'Post' in json_data:
  90. data = json_data['Post']
  91. else:
  92. data = json_data
  93. upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d')
  94. if 'additionalMedia' in data:
  95. formats = sorted(data['additionalMedia'], key=lambda f: int(f['media_height']))
  96. best_format = formats[-1]
  97. video_url = best_format['url']
  98. else:
  99. video_url = data['media']['url']
  100. umobj = re.match(self._URL_EXT, video_url)
  101. if umobj is None:
  102. raise ValueError('Can not determine filename extension')
  103. ext = umobj.group(1)
  104. info = {
  105. 'id': compat_str(data['item_id']),
  106. 'url': video_url,
  107. 'uploader': data['display_name'],
  108. 'upload_date': upload_date,
  109. 'title': data['title'],
  110. 'ext': ext,
  111. 'format': data['media']['mimeType'],
  112. 'thumbnail': data['thumbnailUrl'],
  113. 'description': data['description'],
  114. 'player_url': data['embedUrl'],
  115. 'user_agent': 'iTunes/10.6.1',
  116. }
  117. except (ValueError,KeyError) as err:
  118. raise ExtractorError(u'Unable to parse video information: %s' % repr(err))
  119. return [info]
  120. class BlipTVUserIE(InfoExtractor):
  121. """Information Extractor for blip.tv users."""
  122. _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?blip\.tv/)|bliptvuser:)([^/]+)/*$'
  123. _PAGE_SIZE = 12
  124. IE_NAME = u'blip.tv:user'
  125. def _real_extract(self, url):
  126. # Extract username
  127. mobj = re.match(self._VALID_URL, url)
  128. if mobj is None:
  129. raise ExtractorError(u'Invalid URL: %s' % url)
  130. username = mobj.group(1)
  131. page_base = 'http://m.blip.tv/pr/show_get_full_episode_list?users_id=%s&lite=0&esi=1'
  132. page = self._download_webpage(url, username, u'Downloading user page')
  133. mobj = re.search(r'data-users-id="([^"]+)"', page)
  134. page_base = page_base % mobj.group(1)
  135. # Download video ids using BlipTV Ajax calls. Result size per
  136. # query is limited (currently to 12 videos) so we need to query
  137. # page by page until there are no video ids - it means we got
  138. # all of them.
  139. video_ids = []
  140. pagenum = 1
  141. while True:
  142. url = page_base + "&page=" + str(pagenum)
  143. page = self._download_webpage(url, username,
  144. u'Downloading video ids from page %d' % pagenum)
  145. # Extract video identifiers
  146. ids_in_page = []
  147. for mobj in re.finditer(r'href="/([^"]+)"', page):
  148. if mobj.group(1) not in ids_in_page:
  149. ids_in_page.append(unescapeHTML(mobj.group(1)))
  150. video_ids.extend(ids_in_page)
  151. # A little optimization - if current page is not
  152. # "full", ie. does not contain PAGE_SIZE video ids then
  153. # we can assume that this page is the last one - there
  154. # are no more ids on further pages - no need to query
  155. # again.
  156. if len(ids_in_page) < self._PAGE_SIZE:
  157. break
  158. pagenum += 1
  159. urls = [u'http://blip.tv/%s' % video_id for video_id in video_ids]
  160. url_entries = [self.url_result(vurl, 'BlipTV') for vurl in urls]
  161. return [self.playlist_result(url_entries, playlist_title = username)]