You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

216 lines
8.0 KiB

10 years ago
11 years ago
11 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
11 years ago
10 years ago
10 years ago
10 years ago
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. import json
  5. from .common import InfoExtractor
  6. from ..utils import (
  7. ExtractorError,
  8. int_or_none,
  9. qualities,
  10. )
  11. class IviIE(InfoExtractor):
  12. IE_DESC = 'ivi.ru'
  13. IE_NAME = 'ivi'
  14. _VALID_URL = r'https?://(?:www\.)?ivi\.ru/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P<id>\d+)'
  15. _GEO_BYPASS = False
  16. _GEO_COUNTRIES = ['RU']
  17. _TESTS = [
  18. # Single movie
  19. {
  20. 'url': 'http://www.ivi.ru/watch/53141',
  21. 'md5': '6ff5be2254e796ed346251d117196cf4',
  22. 'info_dict': {
  23. 'id': '53141',
  24. 'ext': 'mp4',
  25. 'title': 'Иван Васильевич меняет профессию',
  26. 'description': 'md5:b924063ea1677c8fe343d8a72ac2195f',
  27. 'duration': 5498,
  28. 'thumbnail': r're:^https?://.*\.jpg$',
  29. },
  30. 'skip': 'Only works from Russia',
  31. },
  32. # Serial's series
  33. {
  34. 'url': 'http://www.ivi.ru/watch/dvoe_iz_lartsa/9549',
  35. 'md5': '221f56b35e3ed815fde2df71032f4b3e',
  36. 'info_dict': {
  37. 'id': '9549',
  38. 'ext': 'mp4',
  39. 'title': 'Двое из ларца - Дело Гольдберга (1 часть)',
  40. 'series': 'Двое из ларца',
  41. 'season': 'Сезон 1',
  42. 'season_number': 1,
  43. 'episode': 'Дело Гольдберга (1 часть)',
  44. 'episode_number': 1,
  45. 'duration': 2655,
  46. 'thumbnail': r're:^https?://.*\.jpg$',
  47. },
  48. 'skip': 'Only works from Russia',
  49. },
  50. {
  51. # with MP4-HD720 format
  52. 'url': 'http://www.ivi.ru/watch/146500',
  53. 'md5': 'd63d35cdbfa1ea61a5eafec7cc523e1e',
  54. 'info_dict': {
  55. 'id': '146500',
  56. 'ext': 'mp4',
  57. 'title': 'Кукла',
  58. 'description': 'md5:ffca9372399976a2d260a407cc74cce6',
  59. 'duration': 5599,
  60. 'thumbnail': r're:^https?://.*\.jpg$',
  61. },
  62. 'skip': 'Only works from Russia',
  63. }
  64. ]
  65. # Sorted by quality
  66. _KNOWN_FORMATS = (
  67. 'MP4-low-mobile', 'MP4-mobile', 'FLV-lo', 'MP4-lo', 'FLV-hi', 'MP4-hi',
  68. 'MP4-SHQ', 'MP4-HD720', 'MP4-HD1080')
  69. def _real_extract(self, url):
  70. video_id = self._match_id(url)
  71. data = {
  72. 'method': 'da.content.get',
  73. 'params': [
  74. video_id, {
  75. 'site': 's183',
  76. 'referrer': 'http://www.ivi.ru/watch/%s' % video_id,
  77. 'contentid': video_id
  78. }
  79. ]
  80. }
  81. video_json = self._download_json(
  82. 'http://api.digitalaccess.ru/api/json/', video_id,
  83. 'Downloading video JSON', data=json.dumps(data))
  84. if 'error' in video_json:
  85. error = video_json['error']
  86. origin = error['origin']
  87. if origin == 'NotAllowedForLocation':
  88. self.raise_geo_restricted(
  89. msg=error['message'], countries=self._GEO_COUNTRIES)
  90. elif origin == 'NoRedisValidData':
  91. raise ExtractorError('Video %s does not exist' % video_id, expected=True)
  92. raise ExtractorError(
  93. 'Unable to download video %s: %s' % (video_id, error['message']),
  94. expected=True)
  95. result = video_json['result']
  96. quality = qualities(self._KNOWN_FORMATS)
  97. formats = [{
  98. 'url': x['url'],
  99. 'format_id': x.get('content_format'),
  100. 'quality': quality(x.get('content_format')),
  101. } for x in result['files'] if x.get('url')]
  102. self._sort_formats(formats)
  103. title = result['title']
  104. duration = int_or_none(result.get('duration'))
  105. compilation = result.get('compilation')
  106. episode = title if compilation else None
  107. title = '%s - %s' % (compilation, title) if compilation is not None else title
  108. thumbnails = [{
  109. 'url': preview['url'],
  110. 'id': preview.get('content_format'),
  111. } for preview in result.get('preview', []) if preview.get('url')]
  112. webpage = self._download_webpage(url, video_id)
  113. season = self._search_regex(
  114. r'<li[^>]+class="season active"[^>]*><a[^>]+>([^<]+)',
  115. webpage, 'season', default=None)
  116. season_number = int_or_none(self._search_regex(
  117. r'<li[^>]+class="season active"[^>]*><a[^>]+data-season(?:-index)?="(\d+)"',
  118. webpage, 'season number', default=None))
  119. episode_number = int_or_none(self._search_regex(
  120. r'[^>]+itemprop="episode"[^>]*>\s*<meta[^>]+itemprop="episodeNumber"[^>]+content="(\d+)',
  121. webpage, 'episode number', default=None))
  122. description = self._og_search_description(webpage, default=None) or self._html_search_meta(
  123. 'description', webpage, 'description', default=None)
  124. return {
  125. 'id': video_id,
  126. 'title': title,
  127. 'series': compilation,
  128. 'season': season,
  129. 'season_number': season_number,
  130. 'episode': episode,
  131. 'episode_number': episode_number,
  132. 'thumbnails': thumbnails,
  133. 'description': description,
  134. 'duration': duration,
  135. 'formats': formats,
  136. }
  137. class IviCompilationIE(InfoExtractor):
  138. IE_DESC = 'ivi.ru compilations'
  139. IE_NAME = 'ivi:compilation'
  140. _VALID_URL = r'https?://(?:www\.)?ivi\.ru/watch/(?!\d+)(?P<compilationid>[a-z\d_-]+)(?:/season(?P<seasonid>\d+))?$'
  141. _TESTS = [{
  142. 'url': 'http://www.ivi.ru/watch/dvoe_iz_lartsa',
  143. 'info_dict': {
  144. 'id': 'dvoe_iz_lartsa',
  145. 'title': 'Двое из ларца (2006 - 2008)',
  146. },
  147. 'playlist_mincount': 24,
  148. }, {
  149. 'url': 'http://www.ivi.ru/watch/dvoe_iz_lartsa/season1',
  150. 'info_dict': {
  151. 'id': 'dvoe_iz_lartsa/season1',
  152. 'title': 'Двое из ларца (2006 - 2008) 1 сезон',
  153. },
  154. 'playlist_mincount': 12,
  155. }]
  156. def _extract_entries(self, html, compilation_id):
  157. return [
  158. self.url_result(
  159. 'http://www.ivi.ru/watch/%s/%s' % (compilation_id, serie), IviIE.ie_key())
  160. for serie in re.findall(
  161. r'<a href="/watch/%s/(\d+)"[^>]+data-id="\1"' % compilation_id, html)]
  162. def _real_extract(self, url):
  163. mobj = re.match(self._VALID_URL, url)
  164. compilation_id = mobj.group('compilationid')
  165. season_id = mobj.group('seasonid')
  166. if season_id is not None: # Season link
  167. season_page = self._download_webpage(
  168. url, compilation_id, 'Downloading season %s web page' % season_id)
  169. playlist_id = '%s/season%s' % (compilation_id, season_id)
  170. playlist_title = self._html_search_meta('title', season_page, 'title')
  171. entries = self._extract_entries(season_page, compilation_id)
  172. else: # Compilation link
  173. compilation_page = self._download_webpage(url, compilation_id, 'Downloading compilation web page')
  174. playlist_id = compilation_id
  175. playlist_title = self._html_search_meta('title', compilation_page, 'title')
  176. seasons = re.findall(
  177. r'<a href="/watch/%s/season(\d+)' % compilation_id, compilation_page)
  178. if not seasons: # No seasons in this compilation
  179. entries = self._extract_entries(compilation_page, compilation_id)
  180. else:
  181. entries = []
  182. for season_id in seasons:
  183. season_page = self._download_webpage(
  184. 'http://www.ivi.ru/watch/%s/season%s' % (compilation_id, season_id),
  185. compilation_id, 'Downloading season %s web page' % season_id)
  186. entries.extend(self._extract_entries(season_page, compilation_id))
  187. return self.playlist_result(entries, playlist_id, playlist_title)