You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

128 lines
5.0 KiB

11 years ago
11 years ago
11 years ago
11 years ago
10 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. from .common import InfoExtractor
  5. from ..compat import compat_str
  6. from ..utils import (
  7. ExtractorError,
  8. unified_strdate,
  9. HEADRequest,
  10. float_or_none,
  11. )
  12. class WatIE(InfoExtractor):
  13. _VALID_URL = r'(?:wat:|https?://(?:www\.)?wat\.tv/video/.*-)(?P<id>[0-9a-z]+)'
  14. IE_NAME = 'wat.tv'
  15. _TESTS = [
  16. {
  17. 'url': 'http://www.wat.tv/video/soupe-figues-l-orange-aux-epices-6z1uz_2hvf7_.html',
  18. 'md5': '83d882d9de5c9d97f0bb2c6273cde56a',
  19. 'info_dict': {
  20. 'id': '11713067',
  21. 'ext': 'mp4',
  22. 'title': 'Soupe de figues à l\'orange et aux épices',
  23. 'description': 'Retrouvez l\'émission "Petits plats en équilibre", diffusée le 18 août 2014.',
  24. 'upload_date': '20140819',
  25. 'duration': 120,
  26. },
  27. },
  28. {
  29. 'url': 'http://www.wat.tv/video/gregory-lemarchal-voix-ange-6z1v7_6ygkj_.html',
  30. 'md5': 'fbc84e4378165278e743956d9c1bf16b',
  31. 'info_dict': {
  32. 'id': '11713075',
  33. 'ext': 'mp4',
  34. 'title': 'Grégory Lemarchal, une voix d\'ange depuis 10 ans (1/3)',
  35. 'description': 'md5:b7a849cf16a2b733d9cd10c52906dee3',
  36. 'upload_date': '20140816',
  37. 'duration': 2910,
  38. },
  39. 'skip': "Ce contenu n'est pas disponible pour l'instant.",
  40. },
  41. ]
  42. def _real_extract(self, url):
  43. video_id = self._match_id(url)
  44. video_id = video_id if video_id.isdigit() and len(video_id) > 6 else compat_str(int(video_id, 36))
  45. # 'contentv4' is used in the website, but it also returns the related
  46. # videos, we don't need them
  47. video_info = self._download_json(
  48. 'http://www.wat.tv/interface/contentv3/' + video_id, video_id)['media']
  49. error_desc = video_info.get('error_desc')
  50. if error_desc:
  51. raise ExtractorError(
  52. '%s returned error: %s' % (self.IE_NAME, error_desc), expected=True)
  53. chapters = video_info['chapters']
  54. first_chapter = chapters[0]
  55. def video_id_for_chapter(chapter):
  56. return chapter['tc_start'].split('-')[0]
  57. if video_id_for_chapter(first_chapter) != video_id:
  58. self.to_screen('Multipart video detected')
  59. entries = [self.url_result('wat:%s' % video_id_for_chapter(chapter)) for chapter in chapters]
  60. return self.playlist_result(entries, video_id, video_info['title'])
  61. # Otherwise we can continue and extract just one part, we have to use
  62. # the video id for getting the video url
  63. date_diffusion = first_chapter.get('date_diffusion')
  64. upload_date = unified_strdate(date_diffusion) if date_diffusion else None
  65. def extract_url(path_template, url_type):
  66. req_url = 'http://www.wat.tv/get/%s' % (path_template % video_id)
  67. head = self._request_webpage(HEADRequest(req_url), video_id, 'Extracting %s url' % url_type)
  68. red_url = head.geturl()
  69. if req_url == red_url:
  70. raise ExtractorError(
  71. '%s said: Sorry, this video is not available from your country.' % self.IE_NAME,
  72. expected=True)
  73. return red_url
  74. m3u8_url = extract_url('ipad/%s.m3u8', 'm3u8')
  75. http_url = extract_url('android5/%s.mp4', 'http')
  76. formats = []
  77. m3u8_formats = self._extract_m3u8_formats(
  78. m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
  79. formats.extend(m3u8_formats)
  80. formats.extend(self._extract_f4m_formats(
  81. m3u8_url.replace('ios.', 'web.').replace('.m3u8', '.f4m'),
  82. video_id, f4m_id='hds', fatal=False))
  83. for m3u8_format in m3u8_formats:
  84. mobj = re.search(
  85. r'audio.*?%3D(\d+)(?:-video.*?%3D(\d+))?', m3u8_format['url'])
  86. if not mobj:
  87. continue
  88. abr, vbr = mobj.groups()
  89. abr, vbr = float_or_none(abr, 1000), float_or_none(vbr, 1000)
  90. m3u8_format.update({
  91. 'vbr': vbr,
  92. 'abr': abr,
  93. })
  94. if not vbr or not abr:
  95. continue
  96. f = m3u8_format.copy()
  97. f.update({
  98. 'url': re.sub(r'%s-\d+00-\d+' % video_id, '%s-%d00-%d' % (video_id, round(vbr / 100), round(abr)), http_url),
  99. 'format_id': f['format_id'].replace('hls', 'http'),
  100. 'protocol': 'http',
  101. })
  102. formats.append(f)
  103. self._sort_formats(formats)
  104. return {
  105. 'id': video_id,
  106. 'title': first_chapter['title'],
  107. 'thumbnail': first_chapter['preview'],
  108. 'description': first_chapter['description'],
  109. 'view_count': video_info['views'],
  110. 'upload_date': upload_date,
  111. 'duration': video_info['files'][0]['duration'],
  112. 'formats': formats,
  113. }