You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

85 lines
3.3 KiB

11 years ago
11 years ago
11 years ago
  1. import re
  2. from .common import InfoExtractor
  3. from ..utils import (
  4. ExtractorError,
  5. unescapeHTML,
  6. )
  7. class SteamIE(InfoExtractor):
  8. _VALID_URL = r"""http://store\.steampowered\.com/
  9. (agecheck/)?
  10. (?P<urltype>video|app)/ #If the page is only for videos or for a game
  11. (?P<gameID>\d+)/?
  12. (?P<videoID>\d*)(?P<extra>\??) #For urltype == video we sometimes get the videoID
  13. """
  14. _VIDEO_PAGE_TEMPLATE = 'http://store.steampowered.com/video/%s/'
  15. _AGECHECK_TEMPLATE = 'http://store.steampowered.com/agecheck/video/%s/?snr=1_agecheck_agecheck__age-gate&ageDay=1&ageMonth=January&ageYear=1970'
  16. _TEST = {
  17. u"url": u"http://store.steampowered.com/video/105600/",
  18. u"playlist": [
  19. {
  20. u"file": u"81300.flv",
  21. u"md5": u"f870007cee7065d7c76b88f0a45ecc07",
  22. u"info_dict": {
  23. u"title": u"Terraria 1.1 Trailer",
  24. u'playlist_index': 1,
  25. }
  26. },
  27. {
  28. u"file": u"80859.flv",
  29. u"md5": u"61aaf31a5c5c3041afb58fb83cbb5751",
  30. u"info_dict": {
  31. u"title": u"Terraria Trailer",
  32. u'playlist_index': 2,
  33. }
  34. }
  35. ]
  36. }
  37. @classmethod
  38. def suitable(cls, url):
  39. """Receives a URL and returns True if suitable for this IE."""
  40. return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
  41. def _real_extract(self, url):
  42. m = re.match(self._VALID_URL, url, re.VERBOSE)
  43. gameID = m.group('gameID')
  44. videourl = self._VIDEO_PAGE_TEMPLATE % gameID
  45. webpage = self._download_webpage(videourl, gameID)
  46. if re.search('<h2>Please enter your birth date to continue:</h2>', webpage) is not None:
  47. videourl = self._AGECHECK_TEMPLATE % gameID
  48. self.report_age_confirmation()
  49. webpage = self._download_webpage(videourl, gameID)
  50. self.report_extraction(gameID)
  51. game_title = self._html_search_regex(r'<h2 class="pageheader">(.*?)</h2>',
  52. webpage, 'game title')
  53. urlRE = r"'movie_(?P<videoID>\d+)': \{\s*FILENAME: \"(?P<videoURL>[\w:/\.\?=]+)\"(,\s*MOVIE_NAME: \"(?P<videoName>[\w:/\.\?=\+-]+)\")?\s*\},"
  54. mweb = re.finditer(urlRE, webpage)
  55. namesRE = r'<span class="title">(?P<videoName>.+?)</span>'
  56. titles = re.finditer(namesRE, webpage)
  57. thumbsRE = r'<img class="movie_thumb" src="(?P<thumbnail>.+?)">'
  58. thumbs = re.finditer(thumbsRE, webpage)
  59. videos = []
  60. for vid,vtitle,thumb in zip(mweb,titles,thumbs):
  61. video_id = vid.group('videoID')
  62. title = vtitle.group('videoName')
  63. video_url = vid.group('videoURL')
  64. video_thumb = thumb.group('thumbnail')
  65. if not video_url:
  66. raise ExtractorError(u'Cannot find video url for %s' % video_id)
  67. info = {
  68. 'id':video_id,
  69. 'url':video_url,
  70. 'ext': 'flv',
  71. 'title': unescapeHTML(title),
  72. 'thumbnail': video_thumb
  73. }
  74. videos.append(info)
  75. return [self.playlist_result(videos, gameID, game_title)]