You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

220 lines
8.1 KiB

11 years ago
11 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
11 years ago
10 years ago
10 years ago
10 years ago
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. import json
  5. from .common import InfoExtractor
  6. from ..utils import (
  7. ExtractorError,
  8. int_or_none,
  9. qualities,
  10. )
  11. class IviIE(InfoExtractor):
  12. IE_DESC = 'ivi.ru'
  13. IE_NAME = 'ivi'
  14. _VALID_URL = r'https?://(?:www\.)?ivi\.(?:ru|tv)/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P<id>\d+)'
  15. _GEO_BYPASS = False
  16. _GEO_COUNTRIES = ['RU']
  17. _TESTS = [
  18. # Single movie
  19. {
  20. 'url': 'http://www.ivi.ru/watch/53141',
  21. 'md5': '6ff5be2254e796ed346251d117196cf4',
  22. 'info_dict': {
  23. 'id': '53141',
  24. 'ext': 'mp4',
  25. 'title': 'Иван Васильевич меняет профессию',
  26. 'description': 'md5:b924063ea1677c8fe343d8a72ac2195f',
  27. 'duration': 5498,
  28. 'thumbnail': r're:^https?://.*\.jpg$',
  29. },
  30. 'skip': 'Only works from Russia',
  31. },
  32. # Serial's series
  33. {
  34. 'url': 'http://www.ivi.ru/watch/dvoe_iz_lartsa/9549',
  35. 'md5': '221f56b35e3ed815fde2df71032f4b3e',
  36. 'info_dict': {
  37. 'id': '9549',
  38. 'ext': 'mp4',
  39. 'title': 'Двое из ларца - Дело Гольдберга (1 часть)',
  40. 'series': 'Двое из ларца',
  41. 'season': 'Сезон 1',
  42. 'season_number': 1,
  43. 'episode': 'Дело Гольдберга (1 часть)',
  44. 'episode_number': 1,
  45. 'duration': 2655,
  46. 'thumbnail': r're:^https?://.*\.jpg$',
  47. },
  48. 'skip': 'Only works from Russia',
  49. },
  50. {
  51. # with MP4-HD720 format
  52. 'url': 'http://www.ivi.ru/watch/146500',
  53. 'md5': 'd63d35cdbfa1ea61a5eafec7cc523e1e',
  54. 'info_dict': {
  55. 'id': '146500',
  56. 'ext': 'mp4',
  57. 'title': 'Кукла',
  58. 'description': 'md5:ffca9372399976a2d260a407cc74cce6',
  59. 'duration': 5599,
  60. 'thumbnail': r're:^https?://.*\.jpg$',
  61. },
  62. 'skip': 'Only works from Russia',
  63. },
  64. {
  65. 'url': 'https://www.ivi.tv/watch/33560/',
  66. 'only_matching': True,
  67. },
  68. ]
  69. # Sorted by quality
  70. _KNOWN_FORMATS = (
  71. 'MP4-low-mobile', 'MP4-mobile', 'FLV-lo', 'MP4-lo', 'FLV-hi', 'MP4-hi',
  72. 'MP4-SHQ', 'MP4-HD720', 'MP4-HD1080')
  73. def _real_extract(self, url):
  74. video_id = self._match_id(url)
  75. data = {
  76. 'method': 'da.content.get',
  77. 'params': [
  78. video_id, {
  79. 'site': 's183',
  80. 'referrer': 'http://www.ivi.ru/watch/%s' % video_id,
  81. 'contentid': video_id
  82. }
  83. ]
  84. }
  85. video_json = self._download_json(
  86. 'http://api.digitalaccess.ru/api/json/', video_id,
  87. 'Downloading video JSON', data=json.dumps(data))
  88. if 'error' in video_json:
  89. error = video_json['error']
  90. origin = error['origin']
  91. if origin == 'NotAllowedForLocation':
  92. self.raise_geo_restricted(
  93. msg=error['message'], countries=self._GEO_COUNTRIES)
  94. elif origin == 'NoRedisValidData':
  95. raise ExtractorError('Video %s does not exist' % video_id, expected=True)
  96. raise ExtractorError(
  97. 'Unable to download video %s: %s' % (video_id, error['message']),
  98. expected=True)
  99. result = video_json['result']
  100. quality = qualities(self._KNOWN_FORMATS)
  101. formats = [{
  102. 'url': x['url'],
  103. 'format_id': x.get('content_format'),
  104. 'quality': quality(x.get('content_format')),
  105. } for x in result['files'] if x.get('url')]
  106. self._sort_formats(formats)
  107. title = result['title']
  108. duration = int_or_none(result.get('duration'))
  109. compilation = result.get('compilation')
  110. episode = title if compilation else None
  111. title = '%s - %s' % (compilation, title) if compilation is not None else title
  112. thumbnails = [{
  113. 'url': preview['url'],
  114. 'id': preview.get('content_format'),
  115. } for preview in result.get('preview', []) if preview.get('url')]
  116. webpage = self._download_webpage(url, video_id)
  117. season = self._search_regex(
  118. r'<li[^>]+class="season active"[^>]*><a[^>]+>([^<]+)',
  119. webpage, 'season', default=None)
  120. season_number = int_or_none(self._search_regex(
  121. r'<li[^>]+class="season active"[^>]*><a[^>]+data-season(?:-index)?="(\d+)"',
  122. webpage, 'season number', default=None))
  123. episode_number = int_or_none(self._search_regex(
  124. r'[^>]+itemprop="episode"[^>]*>\s*<meta[^>]+itemprop="episodeNumber"[^>]+content="(\d+)',
  125. webpage, 'episode number', default=None))
  126. description = self._og_search_description(webpage, default=None) or self._html_search_meta(
  127. 'description', webpage, 'description', default=None)
  128. return {
  129. 'id': video_id,
  130. 'title': title,
  131. 'series': compilation,
  132. 'season': season,
  133. 'season_number': season_number,
  134. 'episode': episode,
  135. 'episode_number': episode_number,
  136. 'thumbnails': thumbnails,
  137. 'description': description,
  138. 'duration': duration,
  139. 'formats': formats,
  140. }
  141. class IviCompilationIE(InfoExtractor):
  142. IE_DESC = 'ivi.ru compilations'
  143. IE_NAME = 'ivi:compilation'
  144. _VALID_URL = r'https?://(?:www\.)?ivi\.ru/watch/(?!\d+)(?P<compilationid>[a-z\d_-]+)(?:/season(?P<seasonid>\d+))?$'
  145. _TESTS = [{
  146. 'url': 'http://www.ivi.ru/watch/dvoe_iz_lartsa',
  147. 'info_dict': {
  148. 'id': 'dvoe_iz_lartsa',
  149. 'title': 'Двое из ларца (2006 - 2008)',
  150. },
  151. 'playlist_mincount': 24,
  152. }, {
  153. 'url': 'http://www.ivi.ru/watch/dvoe_iz_lartsa/season1',
  154. 'info_dict': {
  155. 'id': 'dvoe_iz_lartsa/season1',
  156. 'title': 'Двое из ларца (2006 - 2008) 1 сезон',
  157. },
  158. 'playlist_mincount': 12,
  159. }]
  160. def _extract_entries(self, html, compilation_id):
  161. return [
  162. self.url_result(
  163. 'http://www.ivi.ru/watch/%s/%s' % (compilation_id, serie), IviIE.ie_key())
  164. for serie in re.findall(
  165. r'<a href="/watch/%s/(\d+)"[^>]+data-id="\1"' % compilation_id, html)]
  166. def _real_extract(self, url):
  167. mobj = re.match(self._VALID_URL, url)
  168. compilation_id = mobj.group('compilationid')
  169. season_id = mobj.group('seasonid')
  170. if season_id is not None: # Season link
  171. season_page = self._download_webpage(
  172. url, compilation_id, 'Downloading season %s web page' % season_id)
  173. playlist_id = '%s/season%s' % (compilation_id, season_id)
  174. playlist_title = self._html_search_meta('title', season_page, 'title')
  175. entries = self._extract_entries(season_page, compilation_id)
  176. else: # Compilation link
  177. compilation_page = self._download_webpage(url, compilation_id, 'Downloading compilation web page')
  178. playlist_id = compilation_id
  179. playlist_title = self._html_search_meta('title', compilation_page, 'title')
  180. seasons = re.findall(
  181. r'<a href="/watch/%s/season(\d+)' % compilation_id, compilation_page)
  182. if not seasons: # No seasons in this compilation
  183. entries = self._extract_entries(compilation_page, compilation_id)
  184. else:
  185. entries = []
  186. for season_id in seasons:
  187. season_page = self._download_webpage(
  188. 'http://www.ivi.ru/watch/%s/season%s' % (compilation_id, season_id),
  189. compilation_id, 'Downloading season %s web page' % season_id)
  190. entries.extend(self._extract_entries(season_page, compilation_id))
  191. return self.playlist_result(entries, playlist_id, playlist_title)