You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

53 lines
1.9 KiB

11 years ago
11 years ago
  1. import re
  2. import json
  3. from .common import InfoExtractor
  4. from ..utils import (
  5. ExtractorError,
  6. )
  7. class VevoIE(InfoExtractor):
  8. """
  9. Accecps urls from vevo.com or in the format 'vevo:{id}'
  10. (currently used by MTVIE)
  11. """
  12. _VALID_URL = r'((http://www.vevo.com/watch/.*?/.*?/)|(vevo:))(?P<id>.*)$'
  13. _TEST = {
  14. u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
  15. u'file': u'GB1101300280.mp4',
  16. u'md5': u'06bea460acb744eab74a9d7dcb4bfd61',
  17. u'info_dict': {
  18. u"upload_date": u"20130624",
  19. u"uploader": u"Hurts",
  20. u"title": u"Somebody To Die For"
  21. }
  22. }
  23. def _real_extract(self, url):
  24. mobj = re.match(self._VALID_URL, url)
  25. video_id = mobj.group('id')
  26. json_url = 'http://www.vevo.com/data/video/%s' % video_id
  27. base_url = 'http://smil.lvl3.vevo.com'
  28. videos_url = '%s/Video/V2/VFILE/%s/%sr.smil' % (base_url, video_id, video_id.lower())
  29. info_json = self._download_webpage(json_url, video_id, u'Downloading json info')
  30. links_webpage = self._download_webpage(videos_url, video_id, u'Downloading videos urls')
  31. self.report_extraction(video_id)
  32. video_info = json.loads(info_json)
  33. m_urls = list(re.finditer(r'<video src="(?P<ext>.*?):(?P<url>.*?)"', links_webpage))
  34. if m_urls is None or len(m_urls) == 0:
  35. raise ExtractorError(u'Unable to extract video url')
  36. # They are sorted from worst to best quality
  37. m_url = m_urls[-1]
  38. video_url = base_url + m_url.group('url')
  39. ext = m_url.group('ext')
  40. return {'url': video_url,
  41. 'ext': ext,
  42. 'id': video_id,
  43. 'title': video_info['title'],
  44. 'thumbnail': video_info['img'],
  45. 'upload_date': video_info['launchDate'].replace('/',''),
  46. 'uploader': video_info['Artists'][0]['title'],
  47. }