You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

157 lines
6.1 KiB

11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. from .common import InfoExtractor
  5. from ..compat import compat_str
  6. from ..utils import (
  7. ExtractorError,
  8. unified_strdate,
  9. HEADRequest,
  10. int_or_none,
  11. )
  12. class WatIE(InfoExtractor):
  13. _VALID_URL = r'(?:wat:|https?://(?:www\.)?wat\.tv/video/.*-)(?P<id>[0-9a-z]+)'
  14. IE_NAME = 'wat.tv'
  15. _TESTS = [
  16. {
  17. 'url': 'http://www.wat.tv/video/soupe-figues-l-orange-aux-epices-6z1uz_2hvf7_.html',
  18. 'info_dict': {
  19. 'id': '11713067',
  20. 'ext': 'mp4',
  21. 'title': 'Soupe de figues à l\'orange et aux épices',
  22. 'description': 'Retrouvez l\'émission "Petits plats en équilibre", diffusée le 18 août 2014.',
  23. 'upload_date': '20140819',
  24. 'duration': 120,
  25. },
  26. 'params': {
  27. # m3u8 download
  28. 'skip_download': True,
  29. },
  30. 'expected_warnings': ['HTTP Error 404'],
  31. },
  32. {
  33. 'url': 'http://www.wat.tv/video/gregory-lemarchal-voix-ange-6z1v7_6ygkj_.html',
  34. 'md5': 'b16574df2c3cd1a36ca0098f2a791925',
  35. 'info_dict': {
  36. 'id': '11713075',
  37. 'ext': 'mp4',
  38. 'title': 'Grégory Lemarchal, une voix d\'ange depuis 10 ans (1/3)',
  39. 'upload_date': '20140816',
  40. },
  41. 'expected_warnings': ["Ce contenu n'est pas disponible pour l'instant."],
  42. },
  43. ]
  44. _FORMATS = (
  45. (200, 416, 234),
  46. (400, 480, 270),
  47. (600, 640, 360),
  48. (1200, 640, 360),
  49. (1800, 960, 540),
  50. (2500, 1280, 720),
  51. )
  52. def _real_extract(self, url):
  53. video_id = self._match_id(url)
  54. video_id = video_id if video_id.isdigit() and len(video_id) > 6 else compat_str(int(video_id, 36))
  55. # 'contentv4' is used in the website, but it also returns the related
  56. # videos, we don't need them
  57. video_data = self._download_json(
  58. 'http://www.wat.tv/interface/contentv4s/' + video_id, video_id)
  59. video_info = video_data['media']
  60. error_desc = video_info.get('error_desc')
  61. if error_desc:
  62. self.report_warning(
  63. '%s returned error: %s' % (self.IE_NAME, error_desc))
  64. chapters = video_info['chapters']
  65. if chapters:
  66. first_chapter = chapters[0]
  67. def video_id_for_chapter(chapter):
  68. return chapter['tc_start'].split('-')[0]
  69. if video_id_for_chapter(first_chapter) != video_id:
  70. self.to_screen('Multipart video detected')
  71. entries = [self.url_result('wat:%s' % video_id_for_chapter(chapter)) for chapter in chapters]
  72. return self.playlist_result(entries, video_id, video_info['title'])
  73. # Otherwise we can continue and extract just one part, we have to use
  74. # the video id for getting the video url
  75. else:
  76. first_chapter = video_info
  77. title = first_chapter['title']
  78. def extract_url(path_template, url_type):
  79. req_url = 'http://www.wat.tv/get/%s' % (path_template % video_id)
  80. head = self._request_webpage(HEADRequest(req_url), video_id, 'Extracting %s url' % url_type, fatal=False)
  81. if head:
  82. red_url = head.geturl()
  83. if req_url != red_url:
  84. return red_url
  85. return None
  86. def remove_bitrate_limit(manifest_url):
  87. return re.sub(r'(?:max|min)_bitrate=\d+&?', '', manifest_url)
  88. formats = []
  89. try:
  90. alt_urls = lambda manifest_url: [re.sub(r'(?:wdv|ssm)?\.ism/', repl + '.ism/', manifest_url) for repl in ('', 'ssm')]
  91. manifest_urls = self._download_json(
  92. 'http://www.wat.tv/get/webhtml/' + video_id, video_id)
  93. m3u8_url = manifest_urls.get('hls')
  94. if m3u8_url:
  95. m3u8_url = remove_bitrate_limit(m3u8_url)
  96. for m3u8_alt_url in alt_urls(m3u8_url):
  97. formats.extend(self._extract_m3u8_formats(
  98. m3u8_alt_url, video_id, 'mp4',
  99. 'm3u8_native', m3u8_id='hls', fatal=False))
  100. formats.extend(self._extract_f4m_formats(
  101. m3u8_alt_url.replace('ios', 'web').replace('.m3u8', '.f4m'),
  102. video_id, f4m_id='hds', fatal=False))
  103. mpd_url = manifest_urls.get('mpd')
  104. if mpd_url:
  105. mpd_url = remove_bitrate_limit(mpd_url)
  106. for mpd_alt_url in alt_urls(mpd_url):
  107. formats.extend(self._extract_mpd_formats(
  108. mpd_alt_url, video_id, mpd_id='dash', fatal=False))
  109. self._sort_formats(formats)
  110. except ExtractorError:
  111. abr = 64
  112. for vbr, width, height in self._FORMATS:
  113. tbr = vbr + abr
  114. format_id = 'http-%s' % tbr
  115. fmt_url = 'http://dnl.adv.tf1.fr/2/USP-0x0/%s/%s/%s/ssm/%s-%s-64k.mp4' % (video_id[-4:-2], video_id[-2:], video_id, video_id, vbr)
  116. if self._is_valid_url(fmt_url, video_id, format_id):
  117. formats.append({
  118. 'format_id': format_id,
  119. 'url': fmt_url,
  120. 'vbr': vbr,
  121. 'abr': abr,
  122. 'width': width,
  123. 'height': height,
  124. })
  125. date_diffusion = first_chapter.get('date_diffusion') or video_data.get('configv4', {}).get('estatS4')
  126. upload_date = unified_strdate(date_diffusion) if date_diffusion else None
  127. duration = None
  128. files = video_info['files']
  129. if files:
  130. duration = int_or_none(files[0].get('duration'))
  131. return {
  132. 'id': video_id,
  133. 'title': title,
  134. 'thumbnail': first_chapter.get('preview'),
  135. 'description': first_chapter.get('description'),
  136. 'view_count': int_or_none(video_info.get('views')),
  137. 'upload_date': upload_date,
  138. 'duration': duration,
  139. 'formats': formats,
  140. }