You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

77 lines
2.6 KiB

12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
12 years ago
  1. import re
  2. import json
  3. import xml.etree.ElementTree
  4. import datetime
  5. from .common import InfoExtractor
  6. from ..utils import (
  7. determine_ext,
  8. ExtractorError,
  9. )
  10. class VevoIE(InfoExtractor):
  11. """
  12. Accepts urls from vevo.com or in the format 'vevo:{id}'
  13. (currently used by MTVIE)
  14. """
  15. _VALID_URL = r'((http://www.vevo.com/watch/.*?/.*?/)|(vevo:))(?P<id>.*?)(\?|$)'
  16. _TEST = {
  17. u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
  18. u'file': u'GB1101300280.mp4',
  19. u'info_dict': {
  20. u"upload_date": u"20130624",
  21. u"uploader": u"Hurts",
  22. u"title": u"Somebody to Die For",
  23. u'duration': 230,
  24. }
  25. }
  26. def _real_extract(self, url):
  27. mobj = re.match(self._VALID_URL, url)
  28. video_id = mobj.group('id')
  29. json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id
  30. info_json = self._download_webpage(json_url, video_id, u'Downloading json info')
  31. self.report_extraction(video_id)
  32. video_info = json.loads(info_json)['video']
  33. last_version = {'version': -1}
  34. for version in video_info['videoVersions']:
  35. # These are the HTTP downloads, other types are for different manifests
  36. if version['sourceType'] == 2:
  37. if version['version'] > last_version['version']:
  38. last_version = version
  39. if last_version['version'] == -1:
  40. raise ExtractorError(u'Unable to extract last version of the video')
  41. renditions = xml.etree.ElementTree.fromstring(last_version['data'])
  42. formats = []
  43. # Already sorted from worst to best quality
  44. for rend in renditions.findall('rendition'):
  45. attr = rend.attrib
  46. f_url = attr['url']
  47. formats.append({
  48. 'url': f_url,
  49. 'ext': determine_ext(f_url),
  50. 'height': int(attr['frameheight']),
  51. 'width': int(attr['frameWidth']),
  52. })
  53. date_epoch = int(self._search_regex(
  54. r'/Date\((\d+)\)/', video_info['launchDate'], u'launch date'))/1000
  55. upload_date = datetime.datetime.fromtimestamp(date_epoch)
  56. info = {
  57. 'id': video_id,
  58. 'title': video_info['title'],
  59. 'formats': formats,
  60. 'thumbnail': video_info['imageUrl'],
  61. 'upload_date': upload_date.strftime('%Y%m%d'),
  62. 'uploader': video_info['mainArtists'][0]['artistName'],
  63. 'duration': video_info['duration'],
  64. }
  65. # TODO: Remove when #980 has been merged
  66. info.update(formats[-1])
  67. return info