You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

99 lines
4.1 KiB

10 years ago
  1. from __future__ import unicode_literals
  2. from .common import InfoExtractor
  3. from ..compat import compat_str
  4. from ..utils import (
  5. ExtractorError,
  6. )
  7. class SubtitlesInfoExtractor(InfoExtractor):
  8. @property
  9. def _have_to_download_any_subtitles(self):
  10. return any([self._downloader.params.get('writesubtitles', False),
  11. self._downloader.params.get('writeautomaticsub')])
  12. def _list_available_subtitles(self, video_id, webpage):
  13. """ outputs the available subtitles for the video """
  14. sub_lang_list = self._get_available_subtitles(video_id, webpage)
  15. auto_captions_list = self._get_available_automatic_caption(video_id, webpage)
  16. sub_lang = ",".join(list(sub_lang_list.keys()))
  17. self.to_screen('%s: Available subtitles for video: %s' %
  18. (video_id, sub_lang))
  19. auto_lang = ",".join(auto_captions_list.keys())
  20. self.to_screen('%s: Available automatic captions for video: %s' %
  21. (video_id, auto_lang))
  22. def extract_subtitles(self, video_id, webpage):
  23. """
  24. returns {sub_lang: sub} ,{} if subtitles not found or None if the
  25. subtitles aren't requested.
  26. """
  27. if not self._have_to_download_any_subtitles:
  28. return None
  29. available_subs_list = {}
  30. if self._downloader.params.get('writeautomaticsub', False):
  31. available_subs_list.update(self._get_available_automatic_caption(video_id, webpage))
  32. if self._downloader.params.get('writesubtitles', False):
  33. available_subs_list.update(self._get_available_subtitles(video_id, webpage))
  34. if not available_subs_list: # error, it didn't get the available subtitles
  35. return {}
  36. if self._downloader.params.get('allsubtitles', False):
  37. sub_lang_list = available_subs_list
  38. else:
  39. if self._downloader.params.get('subtitleslangs', False):
  40. requested_langs = self._downloader.params.get('subtitleslangs')
  41. elif 'en' in available_subs_list:
  42. requested_langs = ['en']
  43. else:
  44. requested_langs = [list(available_subs_list.keys())[0]]
  45. sub_lang_list = {}
  46. for sub_lang in requested_langs:
  47. if sub_lang not in available_subs_list:
  48. self._downloader.report_warning('no closed captions found in the specified language "%s"' % sub_lang)
  49. continue
  50. sub_lang_list[sub_lang] = available_subs_list[sub_lang]
  51. subtitles = {}
  52. for sub_lang, url in sub_lang_list.items():
  53. subtitle = self._request_subtitle_url(sub_lang, url)
  54. if subtitle:
  55. subtitles[sub_lang] = subtitle
  56. return subtitles
  57. def _download_subtitle_url(self, sub_lang, url):
  58. return self._download_webpage(url, None, note=False)
  59. def _request_subtitle_url(self, sub_lang, url):
  60. """ makes the http request for the subtitle """
  61. try:
  62. sub = self._download_subtitle_url(sub_lang, url)
  63. except ExtractorError as err:
  64. self._downloader.report_warning('unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err)))
  65. return
  66. if not sub:
  67. self._downloader.report_warning('Did not fetch video subtitles')
  68. return
  69. return sub
  70. def _get_available_subtitles(self, video_id, webpage):
  71. """
  72. returns {sub_lang: url} or {} if not available
  73. Must be redefined by the subclasses
  74. """
  75. # By default, allow implementations to simply pass in the result
  76. assert isinstance(webpage, dict), \
  77. '_get_available_subtitles not implemented'
  78. return webpage
  79. def _get_available_automatic_caption(self, video_id, webpage):
  80. """
  81. returns {sub_lang: url} or {} if not available
  82. Must be redefined by the subclasses that support automatic captions,
  83. otherwise it will return {}
  84. """
  85. self._downloader.report_warning('Automatic Captions not supported by this server')
  86. return {}