You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

118 lines
4.6 KiB

11 years ago
12 years ago
12 years ago
12 years ago
10 years ago
11 years ago
12 years ago
12 years ago
11 years ago
11 years ago
11 years ago
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. from .common import InfoExtractor
  5. from ..compat import compat_str
  6. from ..utils import (
  7. ExtractorError,
  8. unified_strdate,
  9. HEADRequest,
  10. )
  11. class WatIE(InfoExtractor):
  12. _VALID_URL = r'(?:wat:|https?://(?:www\.)?wat\.tv/video/.*-)(?P<id>[0-9a-z]+)'
  13. IE_NAME = 'wat.tv'
  14. _TESTS = [
  15. {
  16. 'url': 'http://www.wat.tv/video/soupe-figues-l-orange-aux-epices-6z1uz_2hvf7_.html',
  17. 'md5': '83d882d9de5c9d97f0bb2c6273cde56a',
  18. 'info_dict': {
  19. 'id': '11713067',
  20. 'ext': 'mp4',
  21. 'title': 'Soupe de figues à l\'orange et aux épices',
  22. 'description': 'Retrouvez l\'émission "Petits plats en équilibre", diffusée le 18 août 2014.',
  23. 'upload_date': '20140819',
  24. 'duration': 120,
  25. },
  26. },
  27. {
  28. 'url': 'http://www.wat.tv/video/gregory-lemarchal-voix-ange-6z1v7_6ygkj_.html',
  29. 'md5': 'fbc84e4378165278e743956d9c1bf16b',
  30. 'info_dict': {
  31. 'id': '11713075',
  32. 'ext': 'mp4',
  33. 'title': 'Grégory Lemarchal, une voix d\'ange depuis 10 ans (1/3)',
  34. 'description': 'md5:b7a849cf16a2b733d9cd10c52906dee3',
  35. 'upload_date': '20140816',
  36. 'duration': 2910,
  37. },
  38. 'skip': "Ce contenu n'est pas disponible pour l'instant.",
  39. },
  40. ]
  41. def _real_extract(self, url):
  42. video_id = self._match_id(url)
  43. video_id = video_id if video_id.isdigit() and len(video_id) > 6 else compat_str(int(video_id, 36))
  44. # 'contentv4' is used in the website, but it also returns the related
  45. # videos, we don't need them
  46. video_info = self._download_json(
  47. 'http://www.wat.tv/interface/contentv3/' + video_id, video_id)['media']
  48. error_desc = video_info.get('error_desc')
  49. if error_desc:
  50. raise ExtractorError(
  51. '%s returned error: %s' % (self.IE_NAME, error_desc), expected=True)
  52. chapters = video_info['chapters']
  53. first_chapter = chapters[0]
  54. def video_id_for_chapter(chapter):
  55. return chapter['tc_start'].split('-')[0]
  56. if video_id_for_chapter(first_chapter) != video_id:
  57. self.to_screen('Multipart video detected')
  58. entries = [self.url_result('wat:%s' % video_id_for_chapter(chapter)) for chapter in chapters]
  59. return self.playlist_result(entries, video_id, video_info['title'])
  60. # Otherwise we can continue and extract just one part, we have to use
  61. # the video id for getting the video url
  62. date_diffusion = first_chapter.get('date_diffusion')
  63. upload_date = unified_strdate(date_diffusion) if date_diffusion else None
  64. def extract_url(path_template, url_type):
  65. req_url = 'http://www.wat.tv/get/%s' % (path_template % video_id)
  66. head = self._request_webpage(HEADRequest(req_url), video_id, 'Extracting %s url' % url_type)
  67. red_url = head.geturl()
  68. if req_url == red_url:
  69. raise ExtractorError(
  70. '%s said: Sorry, this video is not available from your country.' % self.IE_NAME,
  71. expected=True)
  72. return red_url
  73. m3u8_url = extract_url('ipad/%s.m3u8', 'm3u8')
  74. http_url = extract_url('android5/%s.mp4', 'http')
  75. formats = []
  76. m3u8_formats = self._extract_m3u8_formats(
  77. m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
  78. formats.extend(m3u8_formats)
  79. formats.extend(self._extract_f4m_formats(
  80. m3u8_url.replace('ios.', 'web.').replace('.m3u8', '.f4m'),
  81. video_id, f4m_id='hds', fatal=False))
  82. for m3u8_format in m3u8_formats:
  83. vbr, abr = m3u8_format.get('vbr'), m3u8_format.get('abr')
  84. if not vbr or not abr:
  85. continue
  86. f = m3u8_format.copy()
  87. f.update({
  88. 'url': re.sub(r'%s-\d+00-\d+' % video_id, '%s-%d00-%d' % (video_id, round(vbr / 100), round(abr)), http_url),
  89. 'format_id': f['format_id'].replace('hls', 'http'),
  90. 'protocol': 'http',
  91. })
  92. formats.append(f)
  93. self._sort_formats(formats)
  94. return {
  95. 'id': video_id,
  96. 'title': first_chapter['title'],
  97. 'thumbnail': first_chapter['preview'],
  98. 'description': first_chapter['description'],
  99. 'view_count': video_info['views'],
  100. 'upload_date': upload_date,
  101. 'duration': video_info['files'][0]['duration'],
  102. 'formats': formats,
  103. }