You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

53 lines
1.9 KiB

  1. import re
  2. import operator
  3. from .common import InfoExtractor
  4. from ..utils import (
  5. fix_xml_all_ampersand,
  6. )
  7. class MetacriticIE(InfoExtractor):
  8. _VALID_URL = r'https?://www\.metacritic\.com/.+?/trailers/(?P<id>\d+)'
  9. _TEST = {
  10. u'url': u'http://www.metacritic.com/game/playstation-4/infamous-second-son/trailers/3698222',
  11. u'file': u'3698222.mp4',
  12. u'info_dict': {
  13. u'title': u'inFamous: Second Son - inSide Sucker Punch: Smoke & Mirrors',
  14. u'description': u'Take a peak behind-the-scenes to see how Sucker Punch brings smoke into the universe of inFAMOUS Second Son on the PS4.',
  15. u'duration': 221,
  16. },
  17. }
  18. def _real_extract(self, url):
  19. mobj = re.match(self._VALID_URL, url)
  20. video_id = mobj.group('id')
  21. webpage = self._download_webpage(url, video_id)
  22. # The xml is not well formatted, there are raw '&'
  23. info = self._download_xml('http://www.metacritic.com/video_data?video=' + video_id,
  24. video_id, u'Downloading info xml', transform_source=fix_xml_all_ampersand)
  25. clip = next(c for c in info.findall('playList/clip') if c.find('id').text == video_id)
  26. formats = []
  27. for videoFile in clip.findall('httpURI/videoFile'):
  28. rate_str = videoFile.find('rate').text
  29. video_url = videoFile.find('filePath').text
  30. formats.append({
  31. 'url': video_url,
  32. 'ext': 'mp4',
  33. 'format_id': rate_str,
  34. 'rate': int(rate_str),
  35. })
  36. formats.sort(key=operator.itemgetter('rate'))
  37. description = self._html_search_regex(r'<b>Description:</b>(.*?)</p>',
  38. webpage, u'description', flags=re.DOTALL)
  39. return {
  40. 'id': video_id,
  41. 'title': clip.find('title').text,
  42. 'formats': formats,
  43. 'description': description,
  44. 'duration': int(clip.find('duration').text),
  45. }