You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

229 lines
8.2 KiB

11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
  1. from __future__ import unicode_literals
  2. import re
  3. import xml.etree.ElementTree
  4. from .common import InfoExtractor
  5. from ..compat import (
  6. compat_urllib_request,
  7. )
  8. from ..utils import (
  9. ExtractorError,
  10. int_or_none,
  11. )
  12. class VevoIE(InfoExtractor):
  13. """
  14. Accepts urls from vevo.com or in the format 'vevo:{id}'
  15. (currently used by MTVIE and MySpaceIE)
  16. """
  17. _VALID_URL = r'''(?x)
  18. (?:https?://www\.vevo\.com/watch/(?:[^/]+/(?:[^/]+/)?)?|
  19. https?://cache\.vevo\.com/m/html/embed\.html\?video=|
  20. https?://videoplayer\.vevo\.com/embed/embedded\?videoId=|
  21. vevo:)
  22. (?P<id>[^&?#]+)'''
  23. _TESTS = [{
  24. 'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
  25. "md5": "95ee28ee45e70130e3ab02b0f579ae23",
  26. 'info_dict': {
  27. 'id': 'GB1101300280',
  28. 'ext': 'mp4',
  29. "upload_date": "20130624",
  30. "uploader": "Hurts",
  31. "title": "Somebody to Die For",
  32. "duration": 230.12,
  33. "width": 1920,
  34. "height": 1080,
  35. # timestamp and upload_date are often incorrect; seem to change randomly
  36. 'timestamp': int,
  37. }
  38. }, {
  39. 'note': 'v3 SMIL format',
  40. 'url': 'http://www.vevo.com/watch/cassadee-pope/i-wish-i-could-break-your-heart/USUV71302923',
  41. 'md5': 'f6ab09b034f8c22969020b042e5ac7fc',
  42. 'info_dict': {
  43. 'id': 'USUV71302923',
  44. 'ext': 'mp4',
  45. 'upload_date': '20140219',
  46. 'uploader': 'Cassadee Pope',
  47. 'title': 'I Wish I Could Break Your Heart',
  48. 'duration': 226.101,
  49. 'age_limit': 0,
  50. 'timestamp': int,
  51. }
  52. }, {
  53. 'note': 'Age-limited video',
  54. 'url': 'https://www.vevo.com/watch/justin-timberlake/tunnel-vision-explicit/USRV81300282',
  55. 'info_dict': {
  56. 'id': 'USRV81300282',
  57. 'ext': 'mp4',
  58. 'age_limit': 18,
  59. 'title': 'Tunnel Vision (Explicit)',
  60. 'uploader': 'Justin Timberlake',
  61. 'upload_date': 're:2013070[34]',
  62. 'timestamp': int,
  63. },
  64. 'params': {
  65. 'skip_download': 'true',
  66. }
  67. }]
  68. _SMIL_BASE_URL = 'http://smil.lvl3.vevo.com/'
  69. def _real_initialize(self):
  70. req = compat_urllib_request.Request(
  71. 'http://www.vevo.com/auth', data=b'')
  72. webpage = self._download_webpage(
  73. req, None,
  74. note='Retrieving oauth token',
  75. errnote='Unable to retrieve oauth token',
  76. fatal=False)
  77. if webpage is False:
  78. self._oauth_token = None
  79. else:
  80. self._oauth_token = self._search_regex(
  81. r'access_token":\s*"([^"]+)"',
  82. webpage, 'access token', fatal=False)
  83. def _formats_from_json(self, video_info):
  84. last_version = {'version': -1}
  85. for version in video_info['videoVersions']:
  86. # These are the HTTP downloads, other types are for different manifests
  87. if version['sourceType'] == 2:
  88. if version['version'] > last_version['version']:
  89. last_version = version
  90. if last_version['version'] == -1:
  91. raise ExtractorError('Unable to extract last version of the video')
  92. renditions = xml.etree.ElementTree.fromstring(last_version['data'])
  93. formats = []
  94. # Already sorted from worst to best quality
  95. for rend in renditions.findall('rendition'):
  96. attr = rend.attrib
  97. format_note = '%(videoCodec)s@%(videoBitrate)4sk, %(audioCodec)s@%(audioBitrate)3sk' % attr
  98. formats.append({
  99. 'url': attr['url'],
  100. 'format_id': attr['name'],
  101. 'format_note': format_note,
  102. 'height': int(attr['frameheight']),
  103. 'width': int(attr['frameWidth']),
  104. })
  105. return formats
  106. def _formats_from_smil(self, smil_xml):
  107. formats = []
  108. smil_doc = xml.etree.ElementTree.fromstring(smil_xml.encode('utf-8'))
  109. els = smil_doc.findall('.//{http://www.w3.org/2001/SMIL20/Language}video')
  110. for el in els:
  111. src = el.attrib['src']
  112. m = re.match(r'''(?xi)
  113. (?P<ext>[a-z0-9]+):
  114. (?P<path>
  115. [/a-z0-9]+ # The directory and main part of the URL
  116. _(?P<cbr>[0-9]+)k
  117. _(?P<width>[0-9]+)x(?P<height>[0-9]+)
  118. _(?P<vcodec>[a-z0-9]+)
  119. _(?P<vbr>[0-9]+)
  120. _(?P<acodec>[a-z0-9]+)
  121. _(?P<abr>[0-9]+)
  122. \.[a-z0-9]+ # File extension
  123. )''', src)
  124. if not m:
  125. continue
  126. format_url = self._SMIL_BASE_URL + m.group('path')
  127. formats.append({
  128. 'url': format_url,
  129. 'format_id': 'SMIL_' + m.group('cbr'),
  130. 'vcodec': m.group('vcodec'),
  131. 'acodec': m.group('acodec'),
  132. 'vbr': int(m.group('vbr')),
  133. 'abr': int(m.group('abr')),
  134. 'ext': m.group('ext'),
  135. 'width': int(m.group('width')),
  136. 'height': int(m.group('height')),
  137. })
  138. return formats
  139. def _download_api_formats(self, video_id):
  140. if not self._oauth_token:
  141. self._downloader.report_warning(
  142. 'No oauth token available, skipping API HLS download')
  143. return []
  144. api_url = 'https://apiv2.vevo.com/video/%s/streams/hls?token=%s' % (
  145. video_id, self._oauth_token)
  146. api_data = self._download_json(
  147. api_url, video_id,
  148. note='Downloading HLS formats',
  149. errnote='Failed to download HLS format list', fatal=False)
  150. if api_data is None:
  151. return []
  152. m3u8_url = api_data[0]['url']
  153. return self._extract_m3u8_formats(
  154. m3u8_url, video_id, entry_protocol='m3u8_native', ext='mp4',
  155. preference=0)
  156. def _real_extract(self, url):
  157. mobj = re.match(self._VALID_URL, url)
  158. video_id = mobj.group('id')
  159. json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id
  160. response = self._download_json(json_url, video_id)
  161. video_info = response['video']
  162. if not video_info:
  163. if 'statusMessage' in response:
  164. raise ExtractorError('%s said: %s' % (self.IE_NAME, response['statusMessage']), expected=True)
  165. raise ExtractorError('Unable to extract videos')
  166. formats = self._formats_from_json(video_info)
  167. is_explicit = video_info.get('isExplicit')
  168. if is_explicit is True:
  169. age_limit = 18
  170. elif is_explicit is False:
  171. age_limit = 0
  172. else:
  173. age_limit = None
  174. # Download via HLS API
  175. formats.extend(self._download_api_formats(video_id))
  176. # Download SMIL
  177. smil_blocks = sorted((
  178. f for f in video_info['videoVersions']
  179. if f['sourceType'] == 13),
  180. key=lambda f: f['version'])
  181. smil_url = '%s/Video/V2/VFILE/%s/%sr.smil' % (
  182. self._SMIL_BASE_URL, video_id, video_id.lower())
  183. if smil_blocks:
  184. smil_url_m = self._search_regex(
  185. r'url="([^"]+)"', smil_blocks[-1]['data'], 'SMIL URL',
  186. default=None)
  187. if smil_url_m is not None:
  188. smil_url = smil_url_m
  189. if smil_url:
  190. smil_xml = self._download_webpage(
  191. smil_url, video_id, 'Downloading SMIL info', fatal=False)
  192. if smil_xml:
  193. formats.extend(self._formats_from_smil(smil_xml))
  194. self._sort_formats(formats)
  195. timestamp_ms = int_or_none(self._search_regex(
  196. r'/Date\((\d+)\)/',
  197. video_info['launchDate'], 'launch date', fatal=False))
  198. return {
  199. 'id': video_id,
  200. 'title': video_info['title'],
  201. 'formats': formats,
  202. 'thumbnail': video_info['imageUrl'],
  203. 'timestamp': timestamp_ms // 1000,
  204. 'uploader': video_info['mainArtists'][0]['artistName'],
  205. 'duration': video_info['duration'],
  206. 'age_limit': age_limit,
  207. }