You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

55 lines
2.1 KiB

  1. import re
  2. import xml.etree.ElementTree
  3. import operator
  4. from .common import InfoExtractor
  5. class MetacriticIE(InfoExtractor):
  6. _VALID_URL = r'https?://www\.metacritic\.com/.+?/trailers/(?P<id>\d+)'
  7. _TEST = {
  8. u'url': u'http://www.metacritic.com/game/playstation-4/infamous-second-son/trailers/3698222',
  9. u'file': u'3698222.mp4',
  10. u'info_dict': {
  11. u'title': u'inFamous: Second Son - inSide Sucker Punch: Smoke & Mirrors',
  12. u'description': u'Take a peak behind-the-scenes to see how Sucker Punch brings smoke into the universe of inFAMOUS Second Son on the PS4.',
  13. u'duration': 221,
  14. },
  15. }
  16. def _real_extract(self, url):
  17. mobj = re.match(self._VALID_URL, url)
  18. video_id = mobj.group('id')
  19. webpage = self._download_webpage(url, video_id)
  20. # The xml is not well formatted, there are raw '&'
  21. info_xml = self._download_webpage('http://www.metacritic.com/video_data?video=' + video_id,
  22. video_id, u'Downloading info xml').replace('&', '&amp;')
  23. info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
  24. clip = next(c for c in info.findall('playList/clip') if c.find('id').text == video_id)
  25. formats = []
  26. for videoFile in clip.findall('httpURI/videoFile'):
  27. rate_str = videoFile.find('rate').text
  28. video_url = videoFile.find('filePath').text
  29. formats.append({
  30. 'url': video_url,
  31. 'ext': 'mp4',
  32. 'format_id': rate_str,
  33. 'rate': int(rate_str),
  34. })
  35. formats.sort(key=operator.itemgetter('rate'))
  36. description = self._html_search_regex(r'<b>Description:</b>(.*?)</p>',
  37. webpage, u'description', flags=re.DOTALL)
  38. info = {
  39. 'id': video_id,
  40. 'title': clip.find('title').text,
  41. 'formats': formats,
  42. 'description': description,
  43. 'duration': int(clip.find('duration').text),
  44. }
  45. # TODO: Remove when #980 has been merged
  46. info.update(formats[-1])
  47. return info