You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

175 lines
7.1 KiB

11 years ago
11 years ago
10 years ago
11 years ago
10 years ago
10 years ago
10 years ago
10 years ago
  1. # encoding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. import json
  5. from .common import InfoExtractor
  6. from ..utils import (
  7. compat_urllib_request,
  8. ExtractorError,
  9. )
  10. class IviIE(InfoExtractor):
  11. IE_DESC = 'ivi.ru'
  12. IE_NAME = 'ivi'
  13. _VALID_URL = r'https?://(?:www\.)?ivi\.ru/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P<videoid>\d+)'
  14. _TESTS = [
  15. # Single movie
  16. {
  17. 'url': 'http://www.ivi.ru/watch/53141',
  18. 'md5': '6ff5be2254e796ed346251d117196cf4',
  19. 'info_dict': {
  20. 'id': '53141',
  21. 'ext': 'mp4',
  22. 'title': 'Иван Васильевич меняет профессию',
  23. 'description': 'md5:b924063ea1677c8fe343d8a72ac2195f',
  24. 'duration': 5498,
  25. 'thumbnail': 'http://thumbs.ivi.ru/f20.vcp.digitalaccess.ru/contents/d/1/c3c885163a082c29bceeb7b5a267a6.jpg',
  26. },
  27. 'skip': 'Only works from Russia',
  28. },
  29. # Serial's serie
  30. {
  31. 'url': 'http://www.ivi.ru/watch/dvoe_iz_lartsa/9549',
  32. 'md5': '221f56b35e3ed815fde2df71032f4b3e',
  33. 'info_dict': {
  34. 'id': '9549',
  35. 'ext': 'mp4',
  36. 'title': 'Двое из ларца - Серия 1',
  37. 'duration': 2655,
  38. 'thumbnail': 'http://thumbs.ivi.ru/f15.vcp.digitalaccess.ru/contents/8/4/0068dc0677041f3336b7c2baad8fc0.jpg',
  39. },
  40. 'skip': 'Only works from Russia',
  41. }
  42. ]
  43. # Sorted by quality
  44. _known_formats = ['MP4-low-mobile', 'MP4-mobile', 'FLV-lo', 'MP4-lo', 'FLV-hi', 'MP4-hi', 'MP4-SHQ']
  45. # Sorted by size
  46. _known_thumbnails = ['Thumb-120x90', 'Thumb-160', 'Thumb-640x480']
  47. def _extract_description(self, html):
  48. m = re.search(r'<meta name="description" content="(?P<description>[^"]+)"/>', html)
  49. return m.group('description') if m is not None else None
  50. def _extract_comment_count(self, html):
  51. m = re.search('(?s)<a href="#" id="view-comments" class="action-button dim gradient">\s*Комментарии:\s*(?P<commentcount>\d+)\s*</a>', html)
  52. return int(m.group('commentcount')) if m is not None else 0
  53. def _real_extract(self, url):
  54. mobj = re.match(self._VALID_URL, url)
  55. video_id = mobj.group('videoid')
  56. api_url = 'http://api.digitalaccess.ru/api/json/'
  57. data = {'method': 'da.content.get',
  58. 'params': [video_id, {'site': 's183',
  59. 'referrer': 'http://www.ivi.ru/watch/%s' % video_id,
  60. 'contentid': video_id
  61. }
  62. ]
  63. }
  64. request = compat_urllib_request.Request(api_url, json.dumps(data))
  65. video_json_page = self._download_webpage(request, video_id, 'Downloading video JSON')
  66. video_json = json.loads(video_json_page)
  67. if 'error' in video_json:
  68. error = video_json['error']
  69. if error['origin'] == 'NoRedisValidData':
  70. raise ExtractorError('Video %s does not exist' % video_id, expected=True)
  71. raise ExtractorError('Unable to download video %s: %s' % (video_id, error['message']), expected=True)
  72. result = video_json['result']
  73. formats = [{
  74. 'url': x['url'],
  75. 'format_id': x['content_format'],
  76. 'preference': self._known_formats.index(x['content_format']),
  77. } for x in result['files'] if x['content_format'] in self._known_formats]
  78. self._sort_formats(formats)
  79. if not formats:
  80. raise ExtractorError('No media links available for %s' % video_id)
  81. duration = result['duration']
  82. compilation = result['compilation']
  83. title = result['title']
  84. title = '%s - %s' % (compilation, title) if compilation is not None else title
  85. previews = result['preview']
  86. previews.sort(key=lambda fmt: self._known_thumbnails.index(fmt['content_format']))
  87. thumbnail = previews[-1]['url'] if len(previews) > 0 else None
  88. video_page = self._download_webpage(url, video_id, 'Downloading video page')
  89. description = self._extract_description(video_page)
  90. comment_count = self._extract_comment_count(video_page)
  91. return {
  92. 'id': video_id,
  93. 'title': title,
  94. 'thumbnail': thumbnail,
  95. 'description': description,
  96. 'duration': duration,
  97. 'comment_count': comment_count,
  98. 'formats': formats,
  99. }
  100. class IviCompilationIE(InfoExtractor):
  101. IE_DESC = 'ivi.ru compilations'
  102. IE_NAME = 'ivi:compilation'
  103. _VALID_URL = r'https?://(?:www\.)?ivi\.ru/watch/(?!\d+)(?P<compilationid>[a-z\d_-]+)(?:/season(?P<seasonid>\d+))?$'
  104. _TESTS = [{
  105. 'url': 'http://www.ivi.ru/watch/dvoe_iz_lartsa',
  106. 'info_dict': {
  107. 'id': 'dvoe_iz_lartsa',
  108. 'title': 'Двое из ларца (2006 - 2008)',
  109. },
  110. 'playlist_mincount': 24,
  111. }, {
  112. 'url': 'http://www.ivi.ru/watch/dvoe_iz_lartsa/season1',
  113. 'info_dict': {
  114. 'id': 'dvoe_iz_lartsa/season1',
  115. 'title': 'Двое из ларца (2006 - 2008) 1 сезон',
  116. },
  117. 'playlist_mincount': 12,
  118. }]
  119. def _extract_entries(self, html, compilation_id):
  120. return [self.url_result('http://www.ivi.ru/watch/%s/%s' % (compilation_id, serie), 'Ivi')
  121. for serie in re.findall(r'<strong><a href="/watch/%s/(\d+)">(?:[^<]+)</a></strong>' % compilation_id, html)]
  122. def _real_extract(self, url):
  123. mobj = re.match(self._VALID_URL, url)
  124. compilation_id = mobj.group('compilationid')
  125. season_id = mobj.group('seasonid')
  126. if season_id is not None: # Season link
  127. season_page = self._download_webpage(url, compilation_id, 'Downloading season %s web page' % season_id)
  128. playlist_id = '%s/season%s' % (compilation_id, season_id)
  129. playlist_title = self._html_search_meta('title', season_page, 'title')
  130. entries = self._extract_entries(season_page, compilation_id)
  131. else: # Compilation link
  132. compilation_page = self._download_webpage(url, compilation_id, 'Downloading compilation web page')
  133. playlist_id = compilation_id
  134. playlist_title = self._html_search_meta('title', compilation_page, 'title')
  135. seasons = re.findall(r'<a href="/watch/%s/season(\d+)">[^<]+</a>' % compilation_id, compilation_page)
  136. if len(seasons) == 0: # No seasons in this compilation
  137. entries = self._extract_entries(compilation_page, compilation_id)
  138. else:
  139. entries = []
  140. for season_id in seasons:
  141. season_page = self._download_webpage(
  142. 'http://www.ivi.ru/watch/%s/season%s' % (compilation_id, season_id),
  143. compilation_id, 'Downloading season %s web page' % season_id)
  144. entries.extend(self._extract_entries(season_page, compilation_id))
  145. return self.playlist_result(entries, playlist_id, playlist_title)