You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

73 lines
2.6 KiB

  1. import re
  2. import socket
  3. import xml.etree.ElementTree
  4. from .common import InfoExtractor
  5. from ..utils import (
  6. compat_http_client,
  7. compat_str,
  8. compat_urllib_error,
  9. compat_urllib_request,
  10. ExtractorError,
  11. )
  12. class MTVIE(InfoExtractor):
  13. _VALID_URL = r'^(?P<proto>https?://)?(?:www\.)?mtv\.com/videos/[^/]+/(?P<videoid>[0-9]+)/[^/]+$'
  14. def _real_extract(self, url):
  15. mobj = re.match(self._VALID_URL, url)
  16. if mobj is None:
  17. raise ExtractorError(u'Invalid URL: %s' % url)
  18. if not mobj.group('proto'):
  19. url = 'http://' + url
  20. video_id = mobj.group('videoid')
  21. webpage = self._download_webpage(url, video_id)
  22. song_name = self._html_search_regex(r'<meta name="mtv_vt" content="([^"]+)"/>',
  23. webpage, u'song name', fatal=False)
  24. video_title = self._html_search_regex(r'<meta name="mtv_an" content="([^"]+)"/>',
  25. webpage, u'title')
  26. mtvn_uri = self._html_search_regex(r'<meta name="mtvn_uri" content="([^"]+)"/>',
  27. webpage, u'mtvn_uri', fatal=False)
  28. content_id = self._search_regex(r'MTVN.Player.defaultPlaylistId = ([0-9]+);',
  29. webpage, u'content id', fatal=False)
  30. videogen_url = 'http://www.mtv.com/player/includes/mediaGen.jhtml?uri=' + mtvn_uri + '&id=' + content_id + '&vid=' + video_id + '&ref=www.mtvn.com&viewUri=' + mtvn_uri
  31. self.report_extraction(video_id)
  32. request = compat_urllib_request.Request(videogen_url)
  33. try:
  34. metadataXml = compat_urllib_request.urlopen(request).read()
  35. except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
  36. raise ExtractorError(u'Unable to download video metadata: %s' % compat_str(err))
  37. mdoc = xml.etree.ElementTree.fromstring(metadataXml)
  38. print(metadataXml)
  39. renditions = mdoc.findall('.//rendition')
  40. # For now, always pick the highest quality.
  41. rendition = renditions[-1]
  42. try:
  43. _,_,ext = rendition.attrib['type'].partition('/')
  44. format = ext + '-' + rendition.attrib['width'] + 'x' + rendition.attrib['height'] + '_' + rendition.attrib['bitrate']
  45. video_url = rendition.find('./src').text
  46. except KeyError:
  47. raise ExtractorError('Invalid rendition field.')
  48. info = {
  49. 'id': video_id,
  50. 'url': video_url,
  51. 'uploader': performer,
  52. 'upload_date': None,
  53. 'title': video_title,
  54. 'ext': ext,
  55. 'format': format,
  56. }
  57. return [info]