You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

133 lines
5.0 KiB

11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
  1. import re
  2. import json
  3. import xml.etree.ElementTree
  4. import datetime
  5. from .common import InfoExtractor
  6. from ..utils import (
  7. compat_HTTPError,
  8. ExtractorError,
  9. )
  10. class VevoIE(InfoExtractor):
  11. """
  12. Accepts urls from vevo.com or in the format 'vevo:{id}'
  13. (currently used by MTVIE)
  14. """
  15. _VALID_URL = r'''(?x)
  16. (?:https?://www\.vevo\.com/watch/(?:[^/]+/[^/]+/)?|
  17. https?://cache\.vevo\.com/m/html/embed\.html\?video=|
  18. https?://videoplayer\.vevo\.com/embed/embedded\?videoId=|
  19. vevo:)
  20. (?P<id>[^&?#]+)'''
  21. _TESTS = [{
  22. u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
  23. u'file': u'GB1101300280.mp4',
  24. u"md5": u"06bea460acb744eab74a9d7dcb4bfd61",
  25. u'info_dict': {
  26. u"upload_date": u"20130624",
  27. u"uploader": u"Hurts",
  28. u"title": u"Somebody to Die For",
  29. u"duration": 230.12,
  30. u"width": 1920,
  31. u"height": 1080,
  32. }
  33. }]
  34. _SMIL_BASE_URL = 'http://smil.lvl3.vevo.com/'
  35. def _formats_from_json(self, video_info):
  36. last_version = {'version': -1}
  37. for version in video_info['videoVersions']:
  38. # These are the HTTP downloads, other types are for different manifests
  39. if version['sourceType'] == 2:
  40. if version['version'] > last_version['version']:
  41. last_version = version
  42. if last_version['version'] == -1:
  43. raise ExtractorError(u'Unable to extract last version of the video')
  44. renditions = xml.etree.ElementTree.fromstring(last_version['data'])
  45. formats = []
  46. # Already sorted from worst to best quality
  47. for rend in renditions.findall('rendition'):
  48. attr = rend.attrib
  49. format_note = '%(videoCodec)s@%(videoBitrate)4sk, %(audioCodec)s@%(audioBitrate)3sk' % attr
  50. formats.append({
  51. 'url': attr['url'],
  52. 'format_id': attr['name'],
  53. 'format_note': format_note,
  54. 'height': int(attr['frameheight']),
  55. 'width': int(attr['frameWidth']),
  56. })
  57. return formats
  58. def _formats_from_smil(self, smil_xml):
  59. formats = []
  60. smil_doc = xml.etree.ElementTree.fromstring(smil_xml.encode('utf-8'))
  61. els = smil_doc.findall('.//{http://www.w3.org/2001/SMIL20/Language}video')
  62. for el in els:
  63. src = el.attrib['src']
  64. m = re.match(r'''(?xi)
  65. (?P<ext>[a-z0-9]+):
  66. (?P<path>
  67. [/a-z0-9]+ # The directory and main part of the URL
  68. _(?P<cbr>[0-9]+)k
  69. _(?P<width>[0-9]+)x(?P<height>[0-9]+)
  70. _(?P<vcodec>[a-z0-9]+)
  71. _(?P<vbr>[0-9]+)
  72. _(?P<acodec>[a-z0-9]+)
  73. _(?P<abr>[0-9]+)
  74. \.[a-z0-9]+ # File extension
  75. )''', src)
  76. if not m:
  77. continue
  78. format_url = self._SMIL_BASE_URL + m.group('path')
  79. formats.append({
  80. 'url': format_url,
  81. 'format_id': u'SMIL_' + m.group('cbr'),
  82. 'vcodec': m.group('vcodec'),
  83. 'acodec': m.group('acodec'),
  84. 'vbr': int(m.group('vbr')),
  85. 'abr': int(m.group('abr')),
  86. 'ext': m.group('ext'),
  87. 'width': int(m.group('width')),
  88. 'height': int(m.group('height')),
  89. })
  90. return formats
  91. def _real_extract(self, url):
  92. mobj = re.match(self._VALID_URL, url)
  93. video_id = mobj.group('id')
  94. json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id
  95. info_json = self._download_webpage(json_url, video_id, u'Downloading json info')
  96. video_info = json.loads(info_json)['video']
  97. formats = self._formats_from_json(video_info)
  98. try:
  99. smil_url = '%s/Video/V2/VFILE/%s/%sr.smil' % (
  100. self._SMIL_BASE_URL, video_id, video_id.lower())
  101. smil_xml = self._download_webpage(smil_url, video_id,
  102. u'Downloading SMIL info')
  103. formats.extend(self._formats_from_smil(smil_xml))
  104. except ExtractorError as ee:
  105. if not isinstance(ee.cause, compat_HTTPError):
  106. raise
  107. self._downloader.report_warning(
  108. u'Cannot download SMIL information, falling back to JSON ..')
  109. timestamp_ms = int(self._search_regex(
  110. r'/Date\((\d+)\)/', video_info['launchDate'], u'launch date'))
  111. upload_date = datetime.datetime.fromtimestamp(timestamp_ms // 1000)
  112. info = {
  113. 'id': video_id,
  114. 'title': video_info['title'],
  115. 'formats': formats,
  116. 'thumbnail': video_info['imageUrl'],
  117. 'upload_date': upload_date.strftime('%Y%m%d'),
  118. 'uploader': video_info['mainArtists'][0]['artistName'],
  119. 'duration': video_info['duration'],
  120. }
  121. return info