You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

271 lines
10 KiB

11 years ago
11 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
11 years ago
10 years ago
10 years ago
10 years ago
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import json
  4. import re
  5. import sys
  6. from .common import InfoExtractor
  7. from ..utils import (
  8. ExtractorError,
  9. int_or_none,
  10. qualities,
  11. )
  12. class IviIE(InfoExtractor):
  13. IE_DESC = 'ivi.ru'
  14. IE_NAME = 'ivi'
  15. _VALID_URL = r'https?://(?:www\.)?ivi\.(?:ru|tv)/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P<id>\d+)'
  16. _GEO_BYPASS = False
  17. _GEO_COUNTRIES = ['RU']
  18. _LIGHT_KEY = b'\xf1\x02\x32\xb7\xbc\x5c\x7a\xe8\xf7\x96\xc1\x33\x2b\x27\xa1\x8c'
  19. _LIGHT_URL = 'https://api.ivi.ru/light/'
  20. _TESTS = [
  21. # Single movie
  22. {
  23. 'url': 'http://www.ivi.ru/watch/53141',
  24. 'md5': '6ff5be2254e796ed346251d117196cf4',
  25. 'info_dict': {
  26. 'id': '53141',
  27. 'ext': 'mp4',
  28. 'title': 'Иван Васильевич меняет профессию',
  29. 'description': 'md5:b924063ea1677c8fe343d8a72ac2195f',
  30. 'duration': 5498,
  31. 'thumbnail': r're:^https?://.*\.jpg$',
  32. },
  33. 'skip': 'Only works from Russia',
  34. },
  35. # Serial's series
  36. {
  37. 'url': 'http://www.ivi.ru/watch/dvoe_iz_lartsa/9549',
  38. 'md5': '221f56b35e3ed815fde2df71032f4b3e',
  39. 'info_dict': {
  40. 'id': '9549',
  41. 'ext': 'mp4',
  42. 'title': 'Двое из ларца - Дело Гольдберга (1 часть)',
  43. 'series': 'Двое из ларца',
  44. 'season': 'Сезон 1',
  45. 'season_number': 1,
  46. 'episode': 'Дело Гольдберга (1 часть)',
  47. 'episode_number': 1,
  48. 'duration': 2655,
  49. 'thumbnail': r're:^https?://.*\.jpg$',
  50. },
  51. 'skip': 'Only works from Russia',
  52. },
  53. {
  54. # with MP4-HD720 format
  55. 'url': 'http://www.ivi.ru/watch/146500',
  56. 'md5': 'd63d35cdbfa1ea61a5eafec7cc523e1e',
  57. 'info_dict': {
  58. 'id': '146500',
  59. 'ext': 'mp4',
  60. 'title': 'Кукла',
  61. 'description': 'md5:ffca9372399976a2d260a407cc74cce6',
  62. 'duration': 5599,
  63. 'thumbnail': r're:^https?://.*\.jpg$',
  64. },
  65. 'skip': 'Only works from Russia',
  66. },
  67. {
  68. 'url': 'https://www.ivi.tv/watch/33560/',
  69. 'only_matching': True,
  70. },
  71. ]
  72. # Sorted by quality
  73. _KNOWN_FORMATS = (
  74. 'MP4-low-mobile', 'MP4-mobile', 'FLV-lo', 'MP4-lo', 'FLV-hi', 'MP4-hi',
  75. 'MP4-SHQ', 'MP4-HD720', 'MP4-HD1080')
  76. def _real_extract(self, url):
  77. video_id = self._match_id(url)
  78. data = json.dumps({
  79. 'method': 'da.content.get',
  80. 'params': [
  81. video_id, {
  82. 'site': 's%d',
  83. 'referrer': 'http://www.ivi.ru/watch/%s' % video_id,
  84. 'contentid': video_id
  85. }
  86. ]
  87. })
  88. bundled = hasattr(sys, 'frozen')
  89. for site in (353, 183):
  90. content_data = (data % site).encode()
  91. if site == 353:
  92. if bundled:
  93. continue
  94. try:
  95. from Cryptodome.Cipher import Blowfish
  96. from Cryptodome.Hash import CMAC
  97. pycryptodomex_found = True
  98. except ImportError:
  99. pycryptodomex_found = False
  100. continue
  101. timestamp = (self._download_json(
  102. self._LIGHT_URL, video_id,
  103. 'Downloading timestamp JSON', data=json.dumps({
  104. 'method': 'da.timestamp.get',
  105. 'params': []
  106. }).encode(), fatal=False) or {}).get('result')
  107. if not timestamp:
  108. continue
  109. query = {
  110. 'ts': timestamp,
  111. 'sign': CMAC.new(self._LIGHT_KEY, timestamp.encode() + content_data, Blowfish).hexdigest(),
  112. }
  113. else:
  114. query = {}
  115. video_json = self._download_json(
  116. self._LIGHT_URL, video_id,
  117. 'Downloading video JSON', data=content_data, query=query)
  118. error = video_json.get('error')
  119. if error:
  120. origin = error.get('origin')
  121. message = error.get('message') or error.get('user_message')
  122. extractor_msg = 'Unable to download video %s'
  123. if origin == 'NotAllowedForLocation':
  124. self.raise_geo_restricted(message, self._GEO_COUNTRIES)
  125. elif origin == 'NoRedisValidData':
  126. extractor_msg = 'Video %s does not exist'
  127. elif site == 353:
  128. continue
  129. elif bundled:
  130. raise ExtractorError(
  131. 'This feature does not work from bundled exe. Run youtube-dl from sources.',
  132. expected=True)
  133. elif not pycryptodomex_found:
  134. raise ExtractorError(
  135. 'pycryptodomex not found. Please install it.',
  136. expected=True)
  137. elif message:
  138. extractor_msg += ': ' + message
  139. raise ExtractorError(extractor_msg % video_id, expected=True)
  140. else:
  141. break
  142. result = video_json['result']
  143. title = result['title']
  144. quality = qualities(self._KNOWN_FORMATS)
  145. formats = []
  146. for f in result.get('files', []):
  147. f_url = f.get('url')
  148. content_format = f.get('content_format')
  149. if not f_url or '-MDRM-' in content_format or '-FPS-' in content_format:
  150. continue
  151. formats.append({
  152. 'url': f_url,
  153. 'format_id': content_format,
  154. 'quality': quality(content_format),
  155. 'filesize': int_or_none(f.get('size_in_bytes')),
  156. })
  157. self._sort_formats(formats)
  158. compilation = result.get('compilation')
  159. episode = title if compilation else None
  160. title = '%s - %s' % (compilation, title) if compilation is not None else title
  161. thumbnails = [{
  162. 'url': preview['url'],
  163. 'id': preview.get('content_format'),
  164. } for preview in result.get('preview', []) if preview.get('url')]
  165. webpage = self._download_webpage(url, video_id)
  166. season = self._search_regex(
  167. r'<li[^>]+class="season active"[^>]*><a[^>]+>([^<]+)',
  168. webpage, 'season', default=None)
  169. season_number = int_or_none(self._search_regex(
  170. r'<li[^>]+class="season active"[^>]*><a[^>]+data-season(?:-index)?="(\d+)"',
  171. webpage, 'season number', default=None))
  172. episode_number = int_or_none(self._search_regex(
  173. r'[^>]+itemprop="episode"[^>]*>\s*<meta[^>]+itemprop="episodeNumber"[^>]+content="(\d+)',
  174. webpage, 'episode number', default=None))
  175. description = self._og_search_description(webpage, default=None) or self._html_search_meta(
  176. 'description', webpage, 'description', default=None)
  177. return {
  178. 'id': video_id,
  179. 'title': title,
  180. 'series': compilation,
  181. 'season': season,
  182. 'season_number': season_number,
  183. 'episode': episode,
  184. 'episode_number': episode_number,
  185. 'thumbnails': thumbnails,
  186. 'description': description,
  187. 'duration': int_or_none(result.get('duration')),
  188. 'formats': formats,
  189. }
  190. class IviCompilationIE(InfoExtractor):
  191. IE_DESC = 'ivi.ru compilations'
  192. IE_NAME = 'ivi:compilation'
  193. _VALID_URL = r'https?://(?:www\.)?ivi\.ru/watch/(?!\d+)(?P<compilationid>[a-z\d_-]+)(?:/season(?P<seasonid>\d+))?$'
  194. _TESTS = [{
  195. 'url': 'http://www.ivi.ru/watch/dvoe_iz_lartsa',
  196. 'info_dict': {
  197. 'id': 'dvoe_iz_lartsa',
  198. 'title': 'Двое из ларца (2006 - 2008)',
  199. },
  200. 'playlist_mincount': 24,
  201. }, {
  202. 'url': 'http://www.ivi.ru/watch/dvoe_iz_lartsa/season1',
  203. 'info_dict': {
  204. 'id': 'dvoe_iz_lartsa/season1',
  205. 'title': 'Двое из ларца (2006 - 2008) 1 сезон',
  206. },
  207. 'playlist_mincount': 12,
  208. }]
  209. def _extract_entries(self, html, compilation_id):
  210. return [
  211. self.url_result(
  212. 'http://www.ivi.ru/watch/%s/%s' % (compilation_id, serie), IviIE.ie_key())
  213. for serie in re.findall(
  214. r'<a\b[^>]+\bhref=["\']/watch/%s/(\d+)["\']' % compilation_id, html)]
  215. def _real_extract(self, url):
  216. mobj = re.match(self._VALID_URL, url)
  217. compilation_id = mobj.group('compilationid')
  218. season_id = mobj.group('seasonid')
  219. if season_id is not None: # Season link
  220. season_page = self._download_webpage(
  221. url, compilation_id, 'Downloading season %s web page' % season_id)
  222. playlist_id = '%s/season%s' % (compilation_id, season_id)
  223. playlist_title = self._html_search_meta('title', season_page, 'title')
  224. entries = self._extract_entries(season_page, compilation_id)
  225. else: # Compilation link
  226. compilation_page = self._download_webpage(url, compilation_id, 'Downloading compilation web page')
  227. playlist_id = compilation_id
  228. playlist_title = self._html_search_meta('title', compilation_page, 'title')
  229. seasons = re.findall(
  230. r'<a href="/watch/%s/season(\d+)' % compilation_id, compilation_page)
  231. if not seasons: # No seasons in this compilation
  232. entries = self._extract_entries(compilation_page, compilation_id)
  233. else:
  234. entries = []
  235. for season_id in seasons:
  236. season_page = self._download_webpage(
  237. 'http://www.ivi.ru/watch/%s/season%s' % (compilation_id, season_id),
  238. compilation_id, 'Downloading season %s web page' % season_id)
  239. entries.extend(self._extract_entries(season_page, compilation_id))
  240. return self.playlist_result(entries, playlist_id, playlist_title)