You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

80 lines
2.9 KiB

  1. import re
  2. import socket
  3. import xml.etree.ElementTree
  4. from .common import InfoExtractor
  5. from ..utils import (
  6. compat_http_client,
  7. compat_str,
  8. compat_urllib_error,
  9. compat_urllib_request,
  10. ExtractorError,
  11. )
  12. class MTVIE(InfoExtractor):
  13. _VALID_URL = r'^(?P<proto>https?://)?(?:www\.)?mtv\.com/videos/[^/]+/(?P<videoid>[0-9]+)/[^/]+$'
  14. _WORKING = False
  15. def _real_extract(self, url):
  16. mobj = re.match(self._VALID_URL, url)
  17. if mobj is None:
  18. raise ExtractorError(u'Invalid URL: %s' % url)
  19. if not mobj.group('proto'):
  20. url = 'http://' + url
  21. video_id = mobj.group('videoid')
  22. webpage = self._download_webpage(url, video_id)
  23. # Some videos come from Vevo.com
  24. m_vevo = re.search(r'isVevoVideo = true;.*?vevoVideoId = "(.*?)";',
  25. webpage, re.DOTALL)
  26. if m_vevo:
  27. vevo_id = m_vevo.group(1);
  28. self.to_screen(u'Vevo video detected: %s' % vevo_id)
  29. return self.url_result('vevo:%s' % vevo_id, ie='Vevo')
  30. #song_name = self._html_search_regex(r'<meta name="mtv_vt" content="([^"]+)"/>',
  31. # webpage, u'song name', fatal=False)
  32. video_title = self._html_search_regex(r'<meta name="mtv_an" content="([^"]+)"/>',
  33. webpage, u'title')
  34. mtvn_uri = self._html_search_regex(r'<meta name="mtvn_uri" content="([^"]+)"/>',
  35. webpage, u'mtvn_uri', fatal=False)
  36. content_id = self._search_regex(r'MTVN.Player.defaultPlaylistId = ([0-9]+);',
  37. webpage, u'content id', fatal=False)
  38. videogen_url = 'http://www.mtv.com/player/includes/mediaGen.jhtml?uri=' + mtvn_uri + '&id=' + content_id + '&vid=' + video_id + '&ref=www.mtvn.com&viewUri=' + mtvn_uri
  39. self.report_extraction(video_id)
  40. request = compat_urllib_request.Request(videogen_url)
  41. try:
  42. metadataXml = compat_urllib_request.urlopen(request).read()
  43. except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
  44. raise ExtractorError(u'Unable to download video metadata: %s' % compat_str(err))
  45. mdoc = xml.etree.ElementTree.fromstring(metadataXml)
  46. renditions = mdoc.findall('.//rendition')
  47. # For now, always pick the highest quality.
  48. rendition = renditions[-1]
  49. try:
  50. _,_,ext = rendition.attrib['type'].partition('/')
  51. format = ext + '-' + rendition.attrib['width'] + 'x' + rendition.attrib['height'] + '_' + rendition.attrib['bitrate']
  52. video_url = rendition.find('./src').text
  53. except KeyError:
  54. raise ExtractorError('Invalid rendition field.')
  55. info = {
  56. 'id': video_id,
  57. 'url': video_url,
  58. 'upload_date': None,
  59. 'title': video_title,
  60. 'ext': ext,
  61. 'format': format,
  62. }
  63. return [info]