You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

371 lines
14 KiB

10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
  1. #!/usr/bin/env python
  2. from __future__ import unicode_literals
  3. # Allow direct execution
  4. import os
  5. import sys
  6. import unittest
  7. sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  8. from test.helper import FakeYDL, md5
  9. from youtube_dl.extractor import (
  10. BlipTVIE,
  11. YoutubeIE,
  12. DailymotionIE,
  13. TEDIE,
  14. VimeoIE,
  15. WallaIE,
  16. CeskaTelevizeIE,
  17. LyndaIE,
  18. NPOIE,
  19. ComedyCentralIE,
  20. NRKTVIE,
  21. RaiIE,
  22. VikiIE,
  23. ThePlatformIE,
  24. ThePlatformFeedIE,
  25. RTVEALaCartaIE,
  26. FunnyOrDieIE,
  27. DemocracynowIE,
  28. )
  29. class BaseTestSubtitles(unittest.TestCase):
  30. url = None
  31. IE = None
  32. def setUp(self):
  33. self.DL = FakeYDL()
  34. self.ie = self.IE()
  35. self.DL.add_info_extractor(self.ie)
  36. def getInfoDict(self):
  37. info_dict = self.DL.extract_info(self.url, download=False)
  38. return info_dict
  39. def getSubtitles(self):
  40. info_dict = self.getInfoDict()
  41. subtitles = info_dict['requested_subtitles']
  42. if not subtitles:
  43. return subtitles
  44. for sub_info in subtitles.values():
  45. if sub_info.get('data') is None:
  46. uf = self.DL.urlopen(sub_info['url'])
  47. sub_info['data'] = uf.read().decode('utf-8')
  48. return dict((l, sub_info['data']) for l, sub_info in subtitles.items())
  49. class TestYoutubeSubtitles(BaseTestSubtitles):
  50. url = 'QRS8MkLhQmM'
  51. IE = YoutubeIE
  52. def test_youtube_allsubtitles(self):
  53. self.DL.params['writesubtitles'] = True
  54. self.DL.params['allsubtitles'] = True
  55. subtitles = self.getSubtitles()
  56. self.assertEqual(len(subtitles.keys()), 13)
  57. self.assertEqual(md5(subtitles['en']), '4cd9278a35ba2305f47354ee13472260')
  58. self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d')
  59. for lang in ['it', 'fr', 'de']:
  60. self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
  61. def test_youtube_subtitles_sbv_format(self):
  62. self.DL.params['writesubtitles'] = True
  63. self.DL.params['subtitlesformat'] = 'sbv'
  64. subtitles = self.getSubtitles()
  65. self.assertEqual(md5(subtitles['en']), '13aeaa0c245a8bed9a451cb643e3ad8b')
  66. def test_youtube_subtitles_vtt_format(self):
  67. self.DL.params['writesubtitles'] = True
  68. self.DL.params['subtitlesformat'] = 'vtt'
  69. subtitles = self.getSubtitles()
  70. self.assertEqual(md5(subtitles['en']), '3cb210999d3e021bd6c7f0ea751eab06')
  71. def test_youtube_automatic_captions(self):
  72. self.url = '8YoUxe5ncPo'
  73. self.DL.params['writeautomaticsub'] = True
  74. self.DL.params['subtitleslangs'] = ['it']
  75. subtitles = self.getSubtitles()
  76. self.assertTrue(subtitles['it'] is not None)
  77. def test_youtube_translated_subtitles(self):
  78. # This video has a subtitles track, which can be translated
  79. self.url = 'Ky9eprVWzlI'
  80. self.DL.params['writeautomaticsub'] = True
  81. self.DL.params['subtitleslangs'] = ['it']
  82. subtitles = self.getSubtitles()
  83. self.assertTrue(subtitles['it'] is not None)
  84. def test_youtube_nosubtitles(self):
  85. self.DL.expect_warning('video doesn\'t have subtitles')
  86. self.url = 'n5BB19UTcdA'
  87. self.DL.params['writesubtitles'] = True
  88. self.DL.params['allsubtitles'] = True
  89. subtitles = self.getSubtitles()
  90. self.assertFalse(subtitles)
  91. class TestDailymotionSubtitles(BaseTestSubtitles):
  92. url = 'http://www.dailymotion.com/video/xczg00'
  93. IE = DailymotionIE
  94. def test_allsubtitles(self):
  95. self.DL.params['writesubtitles'] = True
  96. self.DL.params['allsubtitles'] = True
  97. subtitles = self.getSubtitles()
  98. self.assertTrue(len(subtitles.keys()) >= 6)
  99. self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f')
  100. self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792')
  101. for lang in ['es', 'fr', 'de']:
  102. self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
  103. def test_nosubtitles(self):
  104. self.DL.expect_warning('video doesn\'t have subtitles')
  105. self.url = 'http://www.dailymotion.com/video/x12u166_le-zapping-tele-star-du-08-aout-2013_tv'
  106. self.DL.params['writesubtitles'] = True
  107. self.DL.params['allsubtitles'] = True
  108. subtitles = self.getSubtitles()
  109. self.assertFalse(subtitles)
  110. class TestTedSubtitles(BaseTestSubtitles):
  111. url = 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html'
  112. IE = TEDIE
  113. def test_allsubtitles(self):
  114. self.DL.params['writesubtitles'] = True
  115. self.DL.params['allsubtitles'] = True
  116. subtitles = self.getSubtitles()
  117. self.assertTrue(len(subtitles.keys()) >= 28)
  118. self.assertEqual(md5(subtitles['en']), '4262c1665ff928a2dada178f62cb8d14')
  119. self.assertEqual(md5(subtitles['fr']), '66a63f7f42c97a50f8c0e90bc7797bb5')
  120. for lang in ['es', 'fr', 'de']:
  121. self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
  122. class TestBlipTVSubtitles(BaseTestSubtitles):
  123. url = 'http://blip.tv/a/a-6603250'
  124. IE = BlipTVIE
  125. def test_allsubtitles(self):
  126. self.DL.params['writesubtitles'] = True
  127. self.DL.params['allsubtitles'] = True
  128. subtitles = self.getSubtitles()
  129. self.assertEqual(set(subtitles.keys()), set(['en']))
  130. self.assertEqual(md5(subtitles['en']), '5b75c300af65fe4476dff79478bb93e4')
  131. class TestVimeoSubtitles(BaseTestSubtitles):
  132. url = 'http://vimeo.com/76979871'
  133. IE = VimeoIE
  134. def test_allsubtitles(self):
  135. self.DL.params['writesubtitles'] = True
  136. self.DL.params['allsubtitles'] = True
  137. subtitles = self.getSubtitles()
  138. self.assertEqual(set(subtitles.keys()), set(['de', 'en', 'es', 'fr']))
  139. self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888')
  140. self.assertEqual(md5(subtitles['fr']), 'b6191146a6c5d3a452244d853fde6dc8')
  141. def test_nosubtitles(self):
  142. self.DL.expect_warning('video doesn\'t have subtitles')
  143. self.url = 'http://vimeo.com/56015672'
  144. self.DL.params['writesubtitles'] = True
  145. self.DL.params['allsubtitles'] = True
  146. subtitles = self.getSubtitles()
  147. self.assertFalse(subtitles)
  148. class TestWallaSubtitles(BaseTestSubtitles):
  149. url = 'http://vod.walla.co.il/movie/2705958/the-yes-men'
  150. IE = WallaIE
  151. def test_allsubtitles(self):
  152. self.DL.expect_warning('Automatic Captions not supported by this server')
  153. self.DL.params['writesubtitles'] = True
  154. self.DL.params['allsubtitles'] = True
  155. subtitles = self.getSubtitles()
  156. self.assertEqual(set(subtitles.keys()), set(['heb']))
  157. self.assertEqual(md5(subtitles['heb']), 'e758c5d7cb982f6bef14f377ec7a3920')
  158. def test_nosubtitles(self):
  159. self.DL.expect_warning('video doesn\'t have subtitles')
  160. self.url = 'http://vod.walla.co.il/movie/2642630/one-direction-all-for-one'
  161. self.DL.params['writesubtitles'] = True
  162. self.DL.params['allsubtitles'] = True
  163. subtitles = self.getSubtitles()
  164. self.assertFalse(subtitles)
  165. class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
  166. url = 'http://www.ceskatelevize.cz/ivysilani/10600540290-u6-uzasny-svet-techniky'
  167. IE = CeskaTelevizeIE
  168. def test_allsubtitles(self):
  169. self.DL.expect_warning('Automatic Captions not supported by this server')
  170. self.DL.params['writesubtitles'] = True
  171. self.DL.params['allsubtitles'] = True
  172. subtitles = self.getSubtitles()
  173. self.assertEqual(set(subtitles.keys()), set(['cs']))
  174. self.assertTrue(len(subtitles['cs']) > 20000)
  175. def test_nosubtitles(self):
  176. self.DL.expect_warning('video doesn\'t have subtitles')
  177. self.url = 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220'
  178. self.DL.params['writesubtitles'] = True
  179. self.DL.params['allsubtitles'] = True
  180. subtitles = self.getSubtitles()
  181. self.assertFalse(subtitles)
  182. class TestLyndaSubtitles(BaseTestSubtitles):
  183. url = 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html'
  184. IE = LyndaIE
  185. def test_allsubtitles(self):
  186. self.DL.params['writesubtitles'] = True
  187. self.DL.params['allsubtitles'] = True
  188. subtitles = self.getSubtitles()
  189. self.assertEqual(set(subtitles.keys()), set(['en']))
  190. self.assertEqual(md5(subtitles['en']), '09bbe67222259bed60deaa26997d73a7')
  191. class TestNPOSubtitles(BaseTestSubtitles):
  192. url = 'http://www.npo.nl/nos-journaal/28-08-2014/POW_00722860'
  193. IE = NPOIE
  194. def test_allsubtitles(self):
  195. self.DL.params['writesubtitles'] = True
  196. self.DL.params['allsubtitles'] = True
  197. subtitles = self.getSubtitles()
  198. self.assertEqual(set(subtitles.keys()), set(['nl']))
  199. self.assertEqual(md5(subtitles['nl']), 'fc6435027572b63fb4ab143abd5ad3f4')
  200. class TestMTVSubtitles(BaseTestSubtitles):
  201. url = 'http://www.cc.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother'
  202. IE = ComedyCentralIE
  203. def getInfoDict(self):
  204. return super(TestMTVSubtitles, self).getInfoDict()['entries'][0]
  205. def test_allsubtitles(self):
  206. self.DL.params['writesubtitles'] = True
  207. self.DL.params['allsubtitles'] = True
  208. subtitles = self.getSubtitles()
  209. self.assertEqual(set(subtitles.keys()), set(['en']))
  210. self.assertEqual(md5(subtitles['en']), 'b9f6ca22a6acf597ec76f61749765e65')
  211. class TestNRKSubtitles(BaseTestSubtitles):
  212. url = 'http://tv.nrk.no/serie/ikke-gjoer-dette-hjemme/DMPV73000411/sesong-2/episode-1'
  213. IE = NRKTVIE
  214. def test_allsubtitles(self):
  215. self.DL.params['writesubtitles'] = True
  216. self.DL.params['allsubtitles'] = True
  217. subtitles = self.getSubtitles()
  218. self.assertEqual(set(subtitles.keys()), set(['no']))
  219. self.assertEqual(md5(subtitles['no']), '544fa917d3197fcbee64634559221cc2')
  220. class TestRaiSubtitles(BaseTestSubtitles):
  221. url = 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-cb27157f-9dd0-4aee-b788-b1f67643a391.html'
  222. IE = RaiIE
  223. def test_allsubtitles(self):
  224. self.DL.params['writesubtitles'] = True
  225. self.DL.params['allsubtitles'] = True
  226. subtitles = self.getSubtitles()
  227. self.assertEqual(set(subtitles.keys()), set(['it']))
  228. self.assertEqual(md5(subtitles['it']), 'b1d90a98755126b61e667567a1f6680a')
  229. class TestVikiSubtitles(BaseTestSubtitles):
  230. url = 'http://www.viki.com/videos/1060846v-punch-episode-18'
  231. IE = VikiIE
  232. def test_allsubtitles(self):
  233. self.DL.params['writesubtitles'] = True
  234. self.DL.params['allsubtitles'] = True
  235. subtitles = self.getSubtitles()
  236. self.assertEqual(set(subtitles.keys()), set(['en']))
  237. self.assertEqual(md5(subtitles['en']), '53cb083a5914b2d84ef1ab67b880d18a')
  238. class TestThePlatformSubtitles(BaseTestSubtitles):
  239. # from http://www.3playmedia.com/services-features/tools/integrations/theplatform/
  240. # (see http://theplatform.com/about/partners/type/subtitles-closed-captioning/)
  241. url = 'theplatform:JFUjUE1_ehvq'
  242. IE = ThePlatformIE
  243. def test_allsubtitles(self):
  244. self.DL.params['writesubtitles'] = True
  245. self.DL.params['allsubtitles'] = True
  246. subtitles = self.getSubtitles()
  247. self.assertEqual(set(subtitles.keys()), set(['en']))
  248. self.assertEqual(md5(subtitles['en']), '97e7670cbae3c4d26ae8bcc7fdd78d4b')
  249. class TestThePlatformFeedSubtitles(BaseTestSubtitles):
  250. url = 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207'
  251. IE = ThePlatformFeedIE
  252. def test_allsubtitles(self):
  253. self.DL.params['writesubtitles'] = True
  254. self.DL.params['allsubtitles'] = True
  255. subtitles = self.getSubtitles()
  256. self.assertEqual(set(subtitles.keys()), set(['en']))
  257. self.assertEqual(md5(subtitles['en']), '48649a22e82b2da21c9a67a395eedade')
  258. class TestRtveSubtitles(BaseTestSubtitles):
  259. url = 'http://www.rtve.es/alacarta/videos/los-misterios-de-laura/misterios-laura-capitulo-32-misterio-del-numero-17-2-parte/2428621/'
  260. IE = RTVEALaCartaIE
  261. def test_allsubtitles(self):
  262. print('Skipping, only available from Spain')
  263. return
  264. self.DL.params['writesubtitles'] = True
  265. self.DL.params['allsubtitles'] = True
  266. subtitles = self.getSubtitles()
  267. self.assertEqual(set(subtitles.keys()), set(['es']))
  268. self.assertEqual(md5(subtitles['es']), '69e70cae2d40574fb7316f31d6eb7fca')
  269. class TestFunnyOrDieSubtitles(BaseTestSubtitles):
  270. url = 'http://www.funnyordie.com/videos/224829ff6d/judd-apatow-will-direct-your-vine'
  271. IE = FunnyOrDieIE
  272. def test_allsubtitles(self):
  273. self.DL.params['writesubtitles'] = True
  274. self.DL.params['allsubtitles'] = True
  275. subtitles = self.getSubtitles()
  276. self.assertEqual(set(subtitles.keys()), set(['en']))
  277. self.assertEqual(md5(subtitles['en']), 'c5593c193eacd353596c11c2d4f9ecc4')
  278. class TestDemocracynowSubtitles(BaseTestSubtitles):
  279. url = 'http://www.democracynow.org/shows/2015/7/3'
  280. IE = DemocracynowIE
  281. def test_allsubtitles(self):
  282. self.DL.params['writesubtitles'] = True
  283. self.DL.params['allsubtitles'] = True
  284. subtitles = self.getSubtitles()
  285. self.assertEqual(set(subtitles.keys()), set(['en']))
  286. self.assertEqual(md5(subtitles['en']), 'acaca989e24a9e45a6719c9b3d60815c')
  287. def test_subtitles_in_page(self):
  288. self.url = 'http://www.democracynow.org/2015/7/3/this_flag_comes_down_today_bree'
  289. self.DL.params['writesubtitles'] = True
  290. self.DL.params['allsubtitles'] = True
  291. subtitles = self.getSubtitles()
  292. self.assertEqual(set(subtitles.keys()), set(['en']))
  293. self.assertEqual(md5(subtitles['en']), 'acaca989e24a9e45a6719c9b3d60815c')
  294. if __name__ == '__main__':
  295. unittest.main()