You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

137 lines
5.2 KiB

11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
10 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. import hashlib
  5. from .common import InfoExtractor
  6. from ..utils import (
  7. ExtractorError,
  8. unified_strdate,
  9. )
  10. class WatIE(InfoExtractor):
  11. _VALID_URL = r'http://www\.wat\.tv/video/(?P<display_id>.*)-(?P<short_id>.*?)_.*?\.html'
  12. IE_NAME = 'wat.tv'
  13. _TESTS = [
  14. {
  15. 'url': 'http://www.wat.tv/video/soupe-figues-l-orange-aux-epices-6z1uz_2hvf7_.html',
  16. 'md5': 'ce70e9223945ed26a8056d413ca55dc9',
  17. 'info_dict': {
  18. 'id': '11713067',
  19. 'display_id': 'soupe-figues-l-orange-aux-epices',
  20. 'ext': 'mp4',
  21. 'title': 'Soupe de figues à l\'orange et aux épices',
  22. 'description': 'Retrouvez l\'émission "Petits plats en équilibre", diffusée le 18 août 2014.',
  23. 'upload_date': '20140819',
  24. 'duration': 120,
  25. },
  26. },
  27. {
  28. 'url': 'http://www.wat.tv/video/gregory-lemarchal-voix-ange-6z1v7_6ygkj_.html',
  29. 'md5': 'fbc84e4378165278e743956d9c1bf16b',
  30. 'info_dict': {
  31. 'id': '11713075',
  32. 'display_id': 'gregory-lemarchal-voix-ange',
  33. 'ext': 'mp4',
  34. 'title': 'Grégory Lemarchal, une voix d\'ange depuis 10 ans (1/3)',
  35. 'description': 'md5:b7a849cf16a2b733d9cd10c52906dee3',
  36. 'upload_date': '20140816',
  37. 'duration': 2910,
  38. },
  39. 'skip': "Ce contenu n'est pas disponible pour l'instant.",
  40. },
  41. ]
  42. def download_video_info(self, real_id):
  43. # 'contentv4' is used in the website, but it also returns the related
  44. # videos, we don't need them
  45. info = self._download_json('http://www.wat.tv/interface/contentv3/' + real_id, real_id)
  46. return info['media']
  47. def _real_extract(self, url):
  48. def real_id_for_chapter(chapter):
  49. return chapter['tc_start'].split('-')[0]
  50. mobj = re.match(self._VALID_URL, url)
  51. short_id = mobj.group('short_id')
  52. display_id = mobj.group('display_id')
  53. webpage = self._download_webpage(url, display_id or short_id)
  54. real_id = self._search_regex(r'xtpage = ".*-(.*?)";', webpage, 'real id')
  55. video_info = self.download_video_info(real_id)
  56. error_desc = video_info.get('error_desc')
  57. if error_desc:
  58. raise ExtractorError(
  59. '%s returned error: %s' % (self.IE_NAME, error_desc), expected=True)
  60. geo_list = video_info.get('geoList')
  61. country = geo_list[0] if geo_list else ''
  62. chapters = video_info['chapters']
  63. first_chapter = chapters[0]
  64. files = video_info['files']
  65. first_file = files[0]
  66. if real_id_for_chapter(first_chapter) != real_id:
  67. self.to_screen('Multipart video detected')
  68. chapter_urls = []
  69. for chapter in chapters:
  70. chapter_id = real_id_for_chapter(chapter)
  71. # Yes, when we this chapter is processed by WatIE,
  72. # it will download the info again
  73. chapter_info = self.download_video_info(chapter_id)
  74. chapter_urls.append(chapter_info['url'])
  75. entries = [self.url_result(chapter_url) for chapter_url in chapter_urls]
  76. return self.playlist_result(entries, real_id, video_info['title'])
  77. upload_date = None
  78. if 'date_diffusion' in first_chapter:
  79. upload_date = unified_strdate(first_chapter['date_diffusion'])
  80. # Otherwise we can continue and extract just one part, we have to use
  81. # the short id for getting the video url
  82. formats = [{
  83. 'url': 'http://wat.tv/get/android5/%s.mp4' % real_id,
  84. 'format_id': 'Mobile',
  85. }]
  86. fmts = [('SD', 'web')]
  87. if first_file.get('hasHD'):
  88. fmts.append(('HD', 'webhd'))
  89. def compute_token(param):
  90. timestamp = '%08x' % int(self._download_webpage(
  91. 'http://www.wat.tv/servertime', real_id,
  92. 'Downloading server time').split('|')[0])
  93. magic = '9b673b13fa4682ed14c3cfa5af5310274b514c4133e9b3a81e6e3aba009l2564'
  94. return '%s/%s' % (hashlib.md5((magic + param + timestamp).encode('ascii')).hexdigest(), timestamp)
  95. for fmt in fmts:
  96. webid = '/%s/%s' % (fmt[1], real_id)
  97. video_url = self._download_webpage(
  98. 'http://www.wat.tv/get%s?token=%s&getURL=1&country=%s' % (webid, compute_token(webid), country),
  99. real_id,
  100. 'Downloading %s video URL' % fmt[0],
  101. 'Failed to download %s video URL' % fmt[0],
  102. False)
  103. if not video_url:
  104. continue
  105. formats.append({
  106. 'url': video_url,
  107. 'ext': 'mp4',
  108. 'format_id': fmt[0],
  109. })
  110. return {
  111. 'id': real_id,
  112. 'display_id': display_id,
  113. 'title': first_chapter['title'],
  114. 'thumbnail': first_chapter['preview'],
  115. 'description': first_chapter['description'],
  116. 'view_count': video_info['views'],
  117. 'upload_date': upload_date,
  118. 'duration': first_file['duration'],
  119. 'formats': formats,
  120. }