You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

72 lines
2.5 KiB

  1. import re
  2. import socket
  3. import xml.etree.ElementTree
  4. from .common import InfoExtractor
  5. from ..utils import (
  6. compat_http_client,
  7. compat_str,
  8. compat_urllib_error,
  9. compat_urllib_request,
  10. ExtractorError,
  11. )
  12. class MTVIE(InfoExtractor):
  13. _VALID_URL = r'^(?P<proto>https?://)?(?:www\.)?mtv\.com/videos/[^/]+/(?P<videoid>[0-9]+)/[^/]+$'
  14. _WORKING = False
  15. def _real_extract(self, url):
  16. mobj = re.match(self._VALID_URL, url)
  17. if mobj is None:
  18. raise ExtractorError(u'Invalid URL: %s' % url)
  19. if not mobj.group('proto'):
  20. url = 'http://' + url
  21. video_id = mobj.group('videoid')
  22. webpage = self._download_webpage(url, video_id)
  23. #song_name = self._html_search_regex(r'<meta name="mtv_vt" content="([^"]+)"/>',
  24. # webpage, u'song name', fatal=False)
  25. video_title = self._html_search_regex(r'<meta name="mtv_an" content="([^"]+)"/>',
  26. webpage, u'title')
  27. mtvn_uri = self._html_search_regex(r'<meta name="mtvn_uri" content="([^"]+)"/>',
  28. webpage, u'mtvn_uri', fatal=False)
  29. content_id = self._search_regex(r'MTVN.Player.defaultPlaylistId = ([0-9]+);',
  30. webpage, u'content id', fatal=False)
  31. videogen_url = 'http://www.mtv.com/player/includes/mediaGen.jhtml?uri=' + mtvn_uri + '&id=' + content_id + '&vid=' + video_id + '&ref=www.mtvn.com&viewUri=' + mtvn_uri
  32. self.report_extraction(video_id)
  33. request = compat_urllib_request.Request(videogen_url)
  34. try:
  35. metadataXml = compat_urllib_request.urlopen(request).read()
  36. except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
  37. raise ExtractorError(u'Unable to download video metadata: %s' % compat_str(err))
  38. mdoc = xml.etree.ElementTree.fromstring(metadataXml)
  39. renditions = mdoc.findall('.//rendition')
  40. # For now, always pick the highest quality.
  41. rendition = renditions[-1]
  42. try:
  43. _,_,ext = rendition.attrib['type'].partition('/')
  44. format = ext + '-' + rendition.attrib['width'] + 'x' + rendition.attrib['height'] + '_' + rendition.attrib['bitrate']
  45. video_url = rendition.find('./src').text
  46. except KeyError:
  47. raise ExtractorError('Invalid rendition field.')
  48. info = {
  49. 'id': video_id,
  50. 'url': video_url,
  51. 'upload_date': None,
  52. 'title': video_title,
  53. 'ext': ext,
  54. 'format': format,
  55. }
  56. return [info]