You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

210 lines
7.8 KiB

10 years ago
11 years ago
11 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
11 years ago
10 years ago
10 years ago
10 years ago
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. import json
  5. from .common import InfoExtractor
  6. from ..utils import (
  7. ExtractorError,
  8. int_or_none,
  9. qualities,
  10. )
  11. class IviIE(InfoExtractor):
  12. IE_DESC = 'ivi.ru'
  13. IE_NAME = 'ivi'
  14. _VALID_URL = r'https?://(?:www\.)?ivi\.ru/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P<id>\d+)'
  15. _TESTS = [
  16. # Single movie
  17. {
  18. 'url': 'http://www.ivi.ru/watch/53141',
  19. 'md5': '6ff5be2254e796ed346251d117196cf4',
  20. 'info_dict': {
  21. 'id': '53141',
  22. 'ext': 'mp4',
  23. 'title': 'Иван Васильевич меняет профессию',
  24. 'description': 'md5:b924063ea1677c8fe343d8a72ac2195f',
  25. 'duration': 5498,
  26. 'thumbnail': 're:^https?://.*\.jpg$',
  27. },
  28. 'skip': 'Only works from Russia',
  29. },
  30. # Serial's series
  31. {
  32. 'url': 'http://www.ivi.ru/watch/dvoe_iz_lartsa/9549',
  33. 'md5': '221f56b35e3ed815fde2df71032f4b3e',
  34. 'info_dict': {
  35. 'id': '9549',
  36. 'ext': 'mp4',
  37. 'title': 'Двое из ларца - Дело Гольдберга (1 часть)',
  38. 'series': 'Двое из ларца',
  39. 'season': 'Сезон 1',
  40. 'season_number': 1,
  41. 'episode': 'Дело Гольдберга (1 часть)',
  42. 'episode_number': 1,
  43. 'duration': 2655,
  44. 'thumbnail': 're:^https?://.*\.jpg$',
  45. },
  46. 'skip': 'Only works from Russia',
  47. },
  48. {
  49. # with MP4-HD720 format
  50. 'url': 'http://www.ivi.ru/watch/146500',
  51. 'md5': 'd63d35cdbfa1ea61a5eafec7cc523e1e',
  52. 'info_dict': {
  53. 'id': '146500',
  54. 'ext': 'mp4',
  55. 'title': 'Кукла',
  56. 'description': 'md5:ffca9372399976a2d260a407cc74cce6',
  57. 'duration': 5599,
  58. 'thumbnail': 're:^https?://.*\.jpg$',
  59. },
  60. 'skip': 'Only works from Russia',
  61. }
  62. ]
  63. # Sorted by quality
  64. _KNOWN_FORMATS = (
  65. 'MP4-low-mobile', 'MP4-mobile', 'FLV-lo', 'MP4-lo', 'FLV-hi', 'MP4-hi',
  66. 'MP4-SHQ', 'MP4-HD720', 'MP4-HD1080')
  67. def _real_extract(self, url):
  68. video_id = self._match_id(url)
  69. data = {
  70. 'method': 'da.content.get',
  71. 'params': [
  72. video_id, {
  73. 'site': 's183',
  74. 'referrer': 'http://www.ivi.ru/watch/%s' % video_id,
  75. 'contentid': video_id
  76. }
  77. ]
  78. }
  79. video_json = self._download_json(
  80. 'http://api.digitalaccess.ru/api/json/', video_id,
  81. 'Downloading video JSON', data=json.dumps(data))
  82. if 'error' in video_json:
  83. error = video_json['error']
  84. if error['origin'] == 'NoRedisValidData':
  85. raise ExtractorError('Video %s does not exist' % video_id, expected=True)
  86. raise ExtractorError(
  87. 'Unable to download video %s: %s' % (video_id, error['message']),
  88. expected=True)
  89. result = video_json['result']
  90. quality = qualities(self._KNOWN_FORMATS)
  91. formats = [{
  92. 'url': x['url'],
  93. 'format_id': x.get('content_format'),
  94. 'quality': quality(x.get('content_format')),
  95. } for x in result['files'] if x.get('url')]
  96. self._sort_formats(formats)
  97. title = result['title']
  98. duration = int_or_none(result.get('duration'))
  99. compilation = result.get('compilation')
  100. episode = title if compilation else None
  101. title = '%s - %s' % (compilation, title) if compilation is not None else title
  102. thumbnails = [{
  103. 'url': preview['url'],
  104. 'id': preview.get('content_format'),
  105. } for preview in result.get('preview', []) if preview.get('url')]
  106. webpage = self._download_webpage(url, video_id)
  107. season = self._search_regex(
  108. r'<li[^>]+class="season active"[^>]*><a[^>]+>([^<]+)',
  109. webpage, 'season', default=None)
  110. season_number = int_or_none(self._search_regex(
  111. r'<li[^>]+class="season active"[^>]*><a[^>]+data-season(?:-index)?="(\d+)"',
  112. webpage, 'season number', default=None))
  113. episode_number = int_or_none(self._search_regex(
  114. r'[^>]+itemprop="episode"[^>]*>\s*<meta[^>]+itemprop="episodeNumber"[^>]+content="(\d+)',
  115. webpage, 'episode number', default=None))
  116. description = self._og_search_description(webpage, default=None) or self._html_search_meta(
  117. 'description', webpage, 'description', default=None)
  118. return {
  119. 'id': video_id,
  120. 'title': title,
  121. 'series': compilation,
  122. 'season': season,
  123. 'season_number': season_number,
  124. 'episode': episode,
  125. 'episode_number': episode_number,
  126. 'thumbnails': thumbnails,
  127. 'description': description,
  128. 'duration': duration,
  129. 'formats': formats,
  130. }
  131. class IviCompilationIE(InfoExtractor):
  132. IE_DESC = 'ivi.ru compilations'
  133. IE_NAME = 'ivi:compilation'
  134. _VALID_URL = r'https?://(?:www\.)?ivi\.ru/watch/(?!\d+)(?P<compilationid>[a-z\d_-]+)(?:/season(?P<seasonid>\d+))?$'
  135. _TESTS = [{
  136. 'url': 'http://www.ivi.ru/watch/dvoe_iz_lartsa',
  137. 'info_dict': {
  138. 'id': 'dvoe_iz_lartsa',
  139. 'title': 'Двое из ларца (2006 - 2008)',
  140. },
  141. 'playlist_mincount': 24,
  142. }, {
  143. 'url': 'http://www.ivi.ru/watch/dvoe_iz_lartsa/season1',
  144. 'info_dict': {
  145. 'id': 'dvoe_iz_lartsa/season1',
  146. 'title': 'Двое из ларца (2006 - 2008) 1 сезон',
  147. },
  148. 'playlist_mincount': 12,
  149. }]
  150. def _extract_entries(self, html, compilation_id):
  151. return [
  152. self.url_result(
  153. 'http://www.ivi.ru/watch/%s/%s' % (compilation_id, serie), IviIE.ie_key())
  154. for serie in re.findall(
  155. r'<a href="/watch/%s/(\d+)"[^>]+data-id="\1"' % compilation_id, html)]
  156. def _real_extract(self, url):
  157. mobj = re.match(self._VALID_URL, url)
  158. compilation_id = mobj.group('compilationid')
  159. season_id = mobj.group('seasonid')
  160. if season_id is not None: # Season link
  161. season_page = self._download_webpage(
  162. url, compilation_id, 'Downloading season %s web page' % season_id)
  163. playlist_id = '%s/season%s' % (compilation_id, season_id)
  164. playlist_title = self._html_search_meta('title', season_page, 'title')
  165. entries = self._extract_entries(season_page, compilation_id)
  166. else: # Compilation link
  167. compilation_page = self._download_webpage(url, compilation_id, 'Downloading compilation web page')
  168. playlist_id = compilation_id
  169. playlist_title = self._html_search_meta('title', compilation_page, 'title')
  170. seasons = re.findall(
  171. r'<a href="/watch/%s/season(\d+)' % compilation_id, compilation_page)
  172. if not seasons: # No seasons in this compilation
  173. entries = self._extract_entries(compilation_page, compilation_id)
  174. else:
  175. entries = []
  176. for season_id in seasons:
  177. season_page = self._download_webpage(
  178. 'http://www.ivi.ru/watch/%s/season%s' % (compilation_id, season_id),
  179. compilation_id, 'Downloading season %s web page' % season_id)
  180. entries.extend(self._extract_entries(season_page, compilation_id))
  181. return self.playlist_result(entries, playlist_id, playlist_title)