You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

345 lines
13 KiB

10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
7 years ago
7 years ago
  1. #!/usr/bin/env python
  2. from __future__ import unicode_literals
  3. # Allow direct execution
  4. import os
  5. import sys
  6. import unittest
  7. sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  8. from test.helper import FakeYDL, md5
  9. from youtube_dl.extractor import (
  10. YoutubeIE,
  11. DailymotionIE,
  12. TEDIE,
  13. VimeoIE,
  14. WallaIE,
  15. CeskaTelevizeIE,
  16. LyndaIE,
  17. NPOIE,
  18. ComedyCentralIE,
  19. NRKTVIE,
  20. RaiPlayIE,
  21. VikiIE,
  22. ThePlatformIE,
  23. ThePlatformFeedIE,
  24. RTVEALaCartaIE,
  25. DemocracynowIE,
  26. )
  27. class BaseTestSubtitles(unittest.TestCase):
  28. url = None
  29. IE = None
  30. def setUp(self):
  31. self.DL = FakeYDL()
  32. self.ie = self.IE()
  33. self.DL.add_info_extractor(self.ie)
  34. def getInfoDict(self):
  35. info_dict = self.DL.extract_info(self.url, download=False)
  36. return info_dict
  37. def getSubtitles(self):
  38. info_dict = self.getInfoDict()
  39. subtitles = info_dict['requested_subtitles']
  40. if not subtitles:
  41. return subtitles
  42. for sub_info in subtitles.values():
  43. if sub_info.get('data') is None:
  44. uf = self.DL.urlopen(sub_info['url'])
  45. sub_info['data'] = uf.read().decode('utf-8')
  46. return dict((l, sub_info['data']) for l, sub_info in subtitles.items())
  47. class TestYoutubeSubtitles(BaseTestSubtitles):
  48. url = 'QRS8MkLhQmM'
  49. IE = YoutubeIE
  50. def test_youtube_allsubtitles(self):
  51. self.DL.params['writesubtitles'] = True
  52. self.DL.params['allsubtitles'] = True
  53. subtitles = self.getSubtitles()
  54. self.assertEqual(len(subtitles.keys()), 13)
  55. self.assertEqual(md5(subtitles['en']), '3cb210999d3e021bd6c7f0ea751eab06')
  56. self.assertEqual(md5(subtitles['it']), '6d752b98c31f1cf8d597050c7a2cb4b5')
  57. for lang in ['fr', 'de']:
  58. self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
  59. def test_youtube_subtitles_ttml_format(self):
  60. self.DL.params['writesubtitles'] = True
  61. self.DL.params['subtitlesformat'] = 'ttml'
  62. subtitles = self.getSubtitles()
  63. self.assertEqual(md5(subtitles['en']), 'e306f8c42842f723447d9f63ad65df54')
  64. def test_youtube_subtitles_vtt_format(self):
  65. self.DL.params['writesubtitles'] = True
  66. self.DL.params['subtitlesformat'] = 'vtt'
  67. subtitles = self.getSubtitles()
  68. self.assertEqual(md5(subtitles['en']), '3cb210999d3e021bd6c7f0ea751eab06')
  69. def test_youtube_automatic_captions(self):
  70. self.url = '8YoUxe5ncPo'
  71. self.DL.params['writeautomaticsub'] = True
  72. self.DL.params['subtitleslangs'] = ['it']
  73. subtitles = self.getSubtitles()
  74. self.assertTrue(subtitles['it'] is not None)
  75. def test_youtube_translated_subtitles(self):
  76. # This video has a subtitles track, which can be translated
  77. self.url = 'Ky9eprVWzlI'
  78. self.DL.params['writeautomaticsub'] = True
  79. self.DL.params['subtitleslangs'] = ['it']
  80. subtitles = self.getSubtitles()
  81. self.assertTrue(subtitles['it'] is not None)
  82. def test_youtube_nosubtitles(self):
  83. self.DL.expect_warning('video doesn\'t have subtitles')
  84. self.url = 'n5BB19UTcdA'
  85. self.DL.params['writesubtitles'] = True
  86. self.DL.params['allsubtitles'] = True
  87. subtitles = self.getSubtitles()
  88. self.assertFalse(subtitles)
  89. class TestDailymotionSubtitles(BaseTestSubtitles):
  90. url = 'http://www.dailymotion.com/video/xczg00'
  91. IE = DailymotionIE
  92. def test_allsubtitles(self):
  93. self.DL.params['writesubtitles'] = True
  94. self.DL.params['allsubtitles'] = True
  95. subtitles = self.getSubtitles()
  96. self.assertTrue(len(subtitles.keys()) >= 6)
  97. self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f')
  98. self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792')
  99. for lang in ['es', 'fr', 'de']:
  100. self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
  101. def test_nosubtitles(self):
  102. self.DL.expect_warning('video doesn\'t have subtitles')
  103. self.url = 'http://www.dailymotion.com/video/x12u166_le-zapping-tele-star-du-08-aout-2013_tv'
  104. self.DL.params['writesubtitles'] = True
  105. self.DL.params['allsubtitles'] = True
  106. subtitles = self.getSubtitles()
  107. self.assertFalse(subtitles)
  108. class TestTedSubtitles(BaseTestSubtitles):
  109. url = 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html'
  110. IE = TEDIE
  111. def test_allsubtitles(self):
  112. self.DL.params['writesubtitles'] = True
  113. self.DL.params['allsubtitles'] = True
  114. subtitles = self.getSubtitles()
  115. self.assertTrue(len(subtitles.keys()) >= 28)
  116. self.assertEqual(md5(subtitles['en']), '4262c1665ff928a2dada178f62cb8d14')
  117. self.assertEqual(md5(subtitles['fr']), '66a63f7f42c97a50f8c0e90bc7797bb5')
  118. for lang in ['es', 'fr', 'de']:
  119. self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
  120. class TestVimeoSubtitles(BaseTestSubtitles):
  121. url = 'http://vimeo.com/76979871'
  122. IE = VimeoIE
  123. def test_allsubtitles(self):
  124. self.DL.params['writesubtitles'] = True
  125. self.DL.params['allsubtitles'] = True
  126. subtitles = self.getSubtitles()
  127. self.assertEqual(set(subtitles.keys()), set(['de', 'en', 'es', 'fr']))
  128. self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888')
  129. self.assertEqual(md5(subtitles['fr']), 'b6191146a6c5d3a452244d853fde6dc8')
  130. def test_nosubtitles(self):
  131. self.DL.expect_warning('video doesn\'t have subtitles')
  132. self.url = 'http://vimeo.com/56015672'
  133. self.DL.params['writesubtitles'] = True
  134. self.DL.params['allsubtitles'] = True
  135. subtitles = self.getSubtitles()
  136. self.assertFalse(subtitles)
  137. class TestWallaSubtitles(BaseTestSubtitles):
  138. url = 'http://vod.walla.co.il/movie/2705958/the-yes-men'
  139. IE = WallaIE
  140. def test_allsubtitles(self):
  141. self.DL.expect_warning('Automatic Captions not supported by this server')
  142. self.DL.params['writesubtitles'] = True
  143. self.DL.params['allsubtitles'] = True
  144. subtitles = self.getSubtitles()
  145. self.assertEqual(set(subtitles.keys()), set(['heb']))
  146. self.assertEqual(md5(subtitles['heb']), 'e758c5d7cb982f6bef14f377ec7a3920')
  147. def test_nosubtitles(self):
  148. self.DL.expect_warning('video doesn\'t have subtitles')
  149. self.url = 'http://vod.walla.co.il/movie/2642630/one-direction-all-for-one'
  150. self.DL.params['writesubtitles'] = True
  151. self.DL.params['allsubtitles'] = True
  152. subtitles = self.getSubtitles()
  153. self.assertFalse(subtitles)
  154. class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
  155. url = 'http://www.ceskatelevize.cz/ivysilani/10600540290-u6-uzasny-svet-techniky'
  156. IE = CeskaTelevizeIE
  157. def test_allsubtitles(self):
  158. self.DL.expect_warning('Automatic Captions not supported by this server')
  159. self.DL.params['writesubtitles'] = True
  160. self.DL.params['allsubtitles'] = True
  161. subtitles = self.getSubtitles()
  162. self.assertEqual(set(subtitles.keys()), set(['cs']))
  163. self.assertTrue(len(subtitles['cs']) > 20000)
  164. def test_nosubtitles(self):
  165. self.DL.expect_warning('video doesn\'t have subtitles')
  166. self.url = 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220'
  167. self.DL.params['writesubtitles'] = True
  168. self.DL.params['allsubtitles'] = True
  169. subtitles = self.getSubtitles()
  170. self.assertFalse(subtitles)
  171. class TestLyndaSubtitles(BaseTestSubtitles):
  172. url = 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html'
  173. IE = LyndaIE
  174. def test_allsubtitles(self):
  175. self.DL.params['writesubtitles'] = True
  176. self.DL.params['allsubtitles'] = True
  177. subtitles = self.getSubtitles()
  178. self.assertEqual(set(subtitles.keys()), set(['en']))
  179. self.assertEqual(md5(subtitles['en']), '09bbe67222259bed60deaa26997d73a7')
  180. class TestNPOSubtitles(BaseTestSubtitles):
  181. url = 'http://www.npo.nl/nos-journaal/28-08-2014/POW_00722860'
  182. IE = NPOIE
  183. def test_allsubtitles(self):
  184. self.DL.params['writesubtitles'] = True
  185. self.DL.params['allsubtitles'] = True
  186. subtitles = self.getSubtitles()
  187. self.assertEqual(set(subtitles.keys()), set(['nl']))
  188. self.assertEqual(md5(subtitles['nl']), 'fc6435027572b63fb4ab143abd5ad3f4')
  189. class TestMTVSubtitles(BaseTestSubtitles):
  190. url = 'http://www.cc.com/video-clips/p63lk0/adam-devine-s-house-party-chasing-white-swans'
  191. IE = ComedyCentralIE
  192. def getInfoDict(self):
  193. return super(TestMTVSubtitles, self).getInfoDict()['entries'][0]
  194. def test_allsubtitles(self):
  195. self.DL.params['writesubtitles'] = True
  196. self.DL.params['allsubtitles'] = True
  197. subtitles = self.getSubtitles()
  198. self.assertEqual(set(subtitles.keys()), set(['en']))
  199. self.assertEqual(md5(subtitles['en']), '78206b8d8a0cfa9da64dc026eea48961')
  200. class TestNRKSubtitles(BaseTestSubtitles):
  201. url = 'http://tv.nrk.no/serie/ikke-gjoer-dette-hjemme/DMPV73000411/sesong-2/episode-1'
  202. IE = NRKTVIE
  203. def test_allsubtitles(self):
  204. self.DL.params['writesubtitles'] = True
  205. self.DL.params['allsubtitles'] = True
  206. subtitles = self.getSubtitles()
  207. self.assertEqual(set(subtitles.keys()), set(['no']))
  208. self.assertEqual(md5(subtitles['no']), '544fa917d3197fcbee64634559221cc2')
  209. class TestRaiPlaySubtitles(BaseTestSubtitles):
  210. url = 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html'
  211. IE = RaiPlayIE
  212. def test_allsubtitles(self):
  213. self.DL.params['writesubtitles'] = True
  214. self.DL.params['allsubtitles'] = True
  215. subtitles = self.getSubtitles()
  216. self.assertEqual(set(subtitles.keys()), set(['it']))
  217. self.assertEqual(md5(subtitles['it']), 'b1d90a98755126b61e667567a1f6680a')
  218. class TestVikiSubtitles(BaseTestSubtitles):
  219. url = 'http://www.viki.com/videos/1060846v-punch-episode-18'
  220. IE = VikiIE
  221. def test_allsubtitles(self):
  222. self.DL.params['writesubtitles'] = True
  223. self.DL.params['allsubtitles'] = True
  224. subtitles = self.getSubtitles()
  225. self.assertEqual(set(subtitles.keys()), set(['en']))
  226. self.assertEqual(md5(subtitles['en']), '53cb083a5914b2d84ef1ab67b880d18a')
  227. class TestThePlatformSubtitles(BaseTestSubtitles):
  228. # from http://www.3playmedia.com/services-features/tools/integrations/theplatform/
  229. # (see http://theplatform.com/about/partners/type/subtitles-closed-captioning/)
  230. url = 'theplatform:JFUjUE1_ehvq'
  231. IE = ThePlatformIE
  232. def test_allsubtitles(self):
  233. self.DL.params['writesubtitles'] = True
  234. self.DL.params['allsubtitles'] = True
  235. subtitles = self.getSubtitles()
  236. self.assertEqual(set(subtitles.keys()), set(['en']))
  237. self.assertEqual(md5(subtitles['en']), '97e7670cbae3c4d26ae8bcc7fdd78d4b')
  238. class TestThePlatformFeedSubtitles(BaseTestSubtitles):
  239. url = 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207'
  240. IE = ThePlatformFeedIE
  241. def test_allsubtitles(self):
  242. self.DL.params['writesubtitles'] = True
  243. self.DL.params['allsubtitles'] = True
  244. subtitles = self.getSubtitles()
  245. self.assertEqual(set(subtitles.keys()), set(['en']))
  246. self.assertEqual(md5(subtitles['en']), '48649a22e82b2da21c9a67a395eedade')
  247. class TestRtveSubtitles(BaseTestSubtitles):
  248. url = 'http://www.rtve.es/alacarta/videos/los-misterios-de-laura/misterios-laura-capitulo-32-misterio-del-numero-17-2-parte/2428621/'
  249. IE = RTVEALaCartaIE
  250. def test_allsubtitles(self):
  251. print('Skipping, only available from Spain')
  252. return
  253. self.DL.params['writesubtitles'] = True
  254. self.DL.params['allsubtitles'] = True
  255. subtitles = self.getSubtitles()
  256. self.assertEqual(set(subtitles.keys()), set(['es']))
  257. self.assertEqual(md5(subtitles['es']), '69e70cae2d40574fb7316f31d6eb7fca')
  258. class TestDemocracynowSubtitles(BaseTestSubtitles):
  259. url = 'http://www.democracynow.org/shows/2015/7/3'
  260. IE = DemocracynowIE
  261. def test_allsubtitles(self):
  262. self.DL.params['writesubtitles'] = True
  263. self.DL.params['allsubtitles'] = True
  264. subtitles = self.getSubtitles()
  265. self.assertEqual(set(subtitles.keys()), set(['en']))
  266. self.assertEqual(md5(subtitles['en']), 'acaca989e24a9e45a6719c9b3d60815c')
  267. def test_subtitles_in_page(self):
  268. self.url = 'http://www.democracynow.org/2015/7/3/this_flag_comes_down_today_bree'
  269. self.DL.params['writesubtitles'] = True
  270. self.DL.params['allsubtitles'] = True
  271. subtitles = self.getSubtitles()
  272. self.assertEqual(set(subtitles.keys()), set(['en']))
  273. self.assertEqual(md5(subtitles['en']), 'acaca989e24a9e45a6719c9b3d60815c')
  274. if __name__ == '__main__':
  275. unittest.main()