You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

166 lines
6.7 KiB

11 years ago
12 years ago
12 years ago
12 years ago
11 years ago
12 years ago
12 years ago
11 years ago
11 years ago
11 years ago
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. from .common import InfoExtractor
  5. from ..compat import compat_str
  6. from ..utils import (
  7. ExtractorError,
  8. unified_strdate,
  9. HEADRequest,
  10. int_or_none,
  11. )
  12. class WatIE(InfoExtractor):
  13. _VALID_URL = r'(?:wat:|https?://(?:www\.)?wat\.tv/video/.*-)(?P<id>[0-9a-z]+)'
  14. IE_NAME = 'wat.tv'
  15. _TESTS = [
  16. {
  17. 'url': 'http://www.wat.tv/video/soupe-figues-l-orange-aux-epices-6z1uz_2hvf7_.html',
  18. 'md5': '83d882d9de5c9d97f0bb2c6273cde56a',
  19. 'info_dict': {
  20. 'id': '11713067',
  21. 'ext': 'mp4',
  22. 'title': 'Soupe de figues à l\'orange et aux épices',
  23. 'description': 'Retrouvez l\'émission "Petits plats en équilibre", diffusée le 18 août 2014.',
  24. 'upload_date': '20140819',
  25. 'duration': 120,
  26. },
  27. },
  28. {
  29. 'url': 'http://www.wat.tv/video/gregory-lemarchal-voix-ange-6z1v7_6ygkj_.html',
  30. 'md5': '34bdfa5ca9fd3c7eb88601b635b0424c',
  31. 'info_dict': {
  32. 'id': '11713075',
  33. 'ext': 'mp4',
  34. 'title': 'Grégory Lemarchal, une voix d\'ange depuis 10 ans (1/3)',
  35. 'upload_date': '20140816',
  36. },
  37. 'expected_warnings': ["Ce contenu n'est pas disponible pour l'instant."],
  38. },
  39. ]
  40. _FORMATS = (
  41. (200, 416, 234),
  42. (400, 480, 270),
  43. (600, 640, 360),
  44. (1200, 640, 360),
  45. (1800, 960, 540),
  46. (2500, 1280, 720),
  47. )
  48. def _real_extract(self, url):
  49. video_id = self._match_id(url)
  50. video_id = video_id if video_id.isdigit() and len(video_id) > 6 else compat_str(int(video_id, 36))
  51. # 'contentv4' is used in the website, but it also returns the related
  52. # videos, we don't need them
  53. video_data = self._download_json(
  54. 'http://www.wat.tv/interface/contentv4s/' + video_id, video_id)
  55. video_info = video_data['media']
  56. error_desc = video_info.get('error_desc')
  57. if error_desc:
  58. self.report_warning(
  59. '%s returned error: %s' % (self.IE_NAME, error_desc))
  60. chapters = video_info['chapters']
  61. if chapters:
  62. first_chapter = chapters[0]
  63. def video_id_for_chapter(chapter):
  64. return chapter['tc_start'].split('-')[0]
  65. if video_id_for_chapter(first_chapter) != video_id:
  66. self.to_screen('Multipart video detected')
  67. entries = [self.url_result('wat:%s' % video_id_for_chapter(chapter)) for chapter in chapters]
  68. return self.playlist_result(entries, video_id, video_info['title'])
  69. # Otherwise we can continue and extract just one part, we have to use
  70. # the video id for getting the video url
  71. else:
  72. first_chapter = video_info
  73. title = first_chapter['title']
  74. def extract_url(path_template, url_type):
  75. req_url = 'http://www.wat.tv/get/%s' % (path_template % video_id)
  76. head = self._request_webpage(HEADRequest(req_url), video_id, 'Extracting %s url' % url_type, fatal=False)
  77. if head:
  78. red_url = head.geturl()
  79. if req_url != red_url:
  80. return red_url
  81. return None
  82. def remove_bitrate_limit(manifest_url):
  83. return re.sub(r'(?:max|min)_bitrate=\d+&?', '', manifest_url)
  84. formats = []
  85. try:
  86. manifest_urls = self._download_json(
  87. 'http://www.wat.tv/get/webhtml/' + video_id, video_id)
  88. m3u8_url = manifest_urls.get('hls')
  89. if m3u8_url:
  90. m3u8_url = remove_bitrate_limit(m3u8_url)
  91. m3u8_formats = self._extract_m3u8_formats(
  92. m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
  93. if m3u8_formats:
  94. formats.extend(m3u8_formats)
  95. formats.extend(self._extract_f4m_formats(
  96. m3u8_url.replace('ios', 'web').replace('.m3u8', '.f4m'),
  97. video_id, f4m_id='hds', fatal=False))
  98. http_url = extract_url('android5/%s.mp4', 'http')
  99. if http_url:
  100. for m3u8_format in m3u8_formats:
  101. vbr, abr = m3u8_format.get('vbr'), m3u8_format.get('abr')
  102. if not vbr or not abr:
  103. continue
  104. format_id = m3u8_format['format_id'].replace('hls', 'http')
  105. fmt_url = re.sub(r'%s-\d+00-\d+' % video_id, '%s-%d00-%d' % (video_id, round(vbr / 100), round(abr)), http_url)
  106. if self._is_valid_url(fmt_url, video_id, format_id):
  107. f = m3u8_format.copy()
  108. f.update({
  109. 'url': fmt_url,
  110. 'format_id': format_id,
  111. 'protocol': 'http',
  112. })
  113. formats.append(f)
  114. mpd_url = manifest_urls.get('mpd')
  115. if mpd_url:
  116. formats.extend(self._extract_mpd_formats(remove_bitrate_limit(
  117. mpd_url), video_id, mpd_id='dash', fatal=False))
  118. self._sort_formats(formats)
  119. except ExtractorError:
  120. abr = 64
  121. for vbr, width, height in self._FORMATS:
  122. tbr = vbr + abr
  123. format_id = 'http-%s' % tbr
  124. fmt_url = 'http://dnl.adv.tf1.fr/2/USP-0x0/%s/%s/%s/ssm/%s-%s-64k.mp4' % (video_id[-4:-2], video_id[-2:], video_id, video_id, vbr)
  125. if self._is_valid_url(fmt_url, video_id, format_id):
  126. formats.append({
  127. 'format_id': format_id,
  128. 'url': fmt_url,
  129. 'vbr': vbr,
  130. 'abr': abr,
  131. 'width': width,
  132. 'height': height,
  133. })
  134. date_diffusion = first_chapter.get('date_diffusion') or video_data.get('configv4', {}).get('estatS4')
  135. upload_date = unified_strdate(date_diffusion) if date_diffusion else None
  136. duration = None
  137. files = video_info['files']
  138. if files:
  139. duration = int_or_none(files[0].get('duration'))
  140. return {
  141. 'id': video_id,
  142. 'title': title,
  143. 'thumbnail': first_chapter.get('preview'),
  144. 'description': first_chapter.get('description'),
  145. 'view_count': int_or_none(video_info.get('views')),
  146. 'upload_date': upload_date,
  147. 'duration': duration,
  148. 'formats': formats,
  149. }