You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

101 lines
3.3 KiB

  1. import re
  2. from ..utils import (
  3. ExtractorError,
  4. unescapeHTML,
  5. unified_strdate,
  6. )
  7. from .subtitles import SubtitlesInfoExtractor
  8. class VikiIE(SubtitlesInfoExtractor):
  9. IE_NAME = u'viki'
  10. _VALID_URL = r'^https?://(?:www\.)?viki\.com/videos/(?P<id>[0-9]+v)'
  11. _TEST = {
  12. u'url': u'http://www.viki.com/videos/1023585v-heirs-episode-14',
  13. u'file': u'1023585v.mp4',
  14. u'md5': u'a21454021c2646f5433514177e2caa5f',
  15. u'info_dict': {
  16. u'title': u'Heirs Episode 14',
  17. u'uploader': u'SBS',
  18. u'description': u'md5:c4b17b9626dd4b143dcc4d855ba3474e',
  19. u'upload_date': u'20131121',
  20. u'age_limit': 13,
  21. },
  22. u'skip': u'Blocked in the US',
  23. }
  24. def _real_extract(self, url):
  25. mobj = re.match(self._VALID_URL, url)
  26. video_id = mobj.group(1)
  27. webpage = self._download_webpage(url, video_id)
  28. title = self._og_search_title(webpage)
  29. description = self._og_search_description(webpage)
  30. thumbnail = self._og_search_thumbnail(webpage)
  31. uploader_m = re.search(
  32. r'<strong>Broadcast Network: </strong>\s*([^<]*)<', webpage)
  33. if uploader_m is None:
  34. uploader = None
  35. else:
  36. uploader = uploader_m.group(1).strip()
  37. rating_str = self._html_search_regex(
  38. r'<strong>Rating: </strong>\s*([^<]*)<', webpage,
  39. u'rating information', default='').strip()
  40. RATINGS = {
  41. 'G': 0,
  42. 'PG': 10,
  43. 'PG-13': 13,
  44. 'R': 16,
  45. 'NC': 18,
  46. }
  47. age_limit = RATINGS.get(rating_str)
  48. info_url = 'http://www.viki.com/player5_fragment/%s?action=show&controller=videos' % video_id
  49. info_webpage = self._download_webpage(
  50. info_url, video_id, note=u'Downloading info page')
  51. if re.match(r'\s*<div\s+class="video-error', info_webpage):
  52. raise ExtractorError(
  53. u'Video %s is blocked from your location.' % video_id,
  54. expected=True)
  55. video_url = self._html_search_regex(
  56. r'<source[^>]+src="([^"]+)"', info_webpage, u'video URL')
  57. upload_date_str = self._html_search_regex(
  58. r'"created_at":"([^"]+)"', info_webpage, u'upload date')
  59. upload_date = (
  60. unified_strdate(upload_date_str)
  61. if upload_date_str is not None
  62. else None
  63. )
  64. # subtitles
  65. video_subtitles = self.extract_subtitles(video_id, info_webpage)
  66. if self._downloader.params.get('listsubtitles', False):
  67. self._list_available_subtitles(video_id, info_webpage)
  68. return
  69. return {
  70. 'id': video_id,
  71. 'title': title,
  72. 'url': video_url,
  73. 'description': description,
  74. 'thumbnail': thumbnail,
  75. 'age_limit': age_limit,
  76. 'uploader': uploader,
  77. 'subtitles': video_subtitles,
  78. 'upload_date': upload_date,
  79. }
  80. def _get_available_subtitles(self, video_id, info_webpage):
  81. res = {}
  82. for sturl_html in re.findall(r'<track src="([^"]+)"/>', info_webpage):
  83. sturl = unescapeHTML(sturl_html)
  84. m = re.search(r'/(?P<lang>[a-z]+)\.vtt', sturl)
  85. if not m:
  86. continue
  87. res[m.group('lang')] = sturl
  88. return res