You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

182 lines
7.1 KiB

10 years ago
11 years ago
11 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
11 years ago
10 years ago
10 years ago
10 years ago
10 years ago
  1. # encoding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. import json
  5. from .common import InfoExtractor
  6. from ..compat import (
  7. compat_urllib_request,
  8. )
  9. from ..utils import (
  10. ExtractorError,
  11. )
  12. class IviIE(InfoExtractor):
  13. IE_DESC = 'ivi.ru'
  14. IE_NAME = 'ivi'
  15. _VALID_URL = r'https?://(?:www\.)?ivi\.ru/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P<id>\d+)'
  16. _TESTS = [
  17. # Single movie
  18. {
  19. 'url': 'http://www.ivi.ru/watch/53141',
  20. 'md5': '6ff5be2254e796ed346251d117196cf4',
  21. 'info_dict': {
  22. 'id': '53141',
  23. 'ext': 'mp4',
  24. 'title': 'Иван Васильевич меняет профессию',
  25. 'description': 'md5:b924063ea1677c8fe343d8a72ac2195f',
  26. 'duration': 5498,
  27. 'thumbnail': 'http://thumbs.ivi.ru/f20.vcp.digitalaccess.ru/contents/d/1/c3c885163a082c29bceeb7b5a267a6.jpg',
  28. },
  29. 'skip': 'Only works from Russia',
  30. },
  31. # Serial's serie
  32. {
  33. 'url': 'http://www.ivi.ru/watch/dvoe_iz_lartsa/9549',
  34. 'md5': '221f56b35e3ed815fde2df71032f4b3e',
  35. 'info_dict': {
  36. 'id': '9549',
  37. 'ext': 'mp4',
  38. 'title': 'Двое из ларца - Серия 1',
  39. 'duration': 2655,
  40. 'thumbnail': 'http://thumbs.ivi.ru/f15.vcp.digitalaccess.ru/contents/8/4/0068dc0677041f3336b7c2baad8fc0.jpg',
  41. },
  42. 'skip': 'Only works from Russia',
  43. }
  44. ]
  45. # Sorted by quality
  46. _known_formats = ['MP4-low-mobile', 'MP4-mobile', 'FLV-lo', 'MP4-lo', 'FLV-hi', 'MP4-hi', 'MP4-SHQ']
  47. # Sorted by size
  48. _known_thumbnails = ['Thumb-120x90', 'Thumb-160', 'Thumb-640x480']
  49. def _extract_description(self, html):
  50. m = re.search(r'<meta name="description" content="(?P<description>[^"]+)"/>', html)
  51. return m.group('description') if m is not None else None
  52. def _extract_comment_count(self, html):
  53. m = re.search('(?s)<a href="#" id="view-comments" class="action-button dim gradient">\s*Комментарии:\s*(?P<commentcount>\d+)\s*</a>', html)
  54. return int(m.group('commentcount')) if m is not None else 0
  55. def _real_extract(self, url):
  56. video_id = self._match_id(url)
  57. api_url = 'http://api.digitalaccess.ru/api/json/'
  58. data = {
  59. 'method': 'da.content.get',
  60. 'params': [
  61. video_id, {
  62. 'site': 's183',
  63. 'referrer': 'http://www.ivi.ru/watch/%s' % video_id,
  64. 'contentid': video_id
  65. }
  66. ]
  67. }
  68. request = compat_urllib_request.Request(api_url, json.dumps(data))
  69. video_json_page = self._download_webpage(
  70. request, video_id, 'Downloading video JSON')
  71. video_json = json.loads(video_json_page)
  72. if 'error' in video_json:
  73. error = video_json['error']
  74. if error['origin'] == 'NoRedisValidData':
  75. raise ExtractorError('Video %s does not exist' % video_id, expected=True)
  76. raise ExtractorError(
  77. 'Unable to download video %s: %s' % (video_id, error['message']),
  78. expected=True)
  79. result = video_json['result']
  80. formats = [{
  81. 'url': x['url'],
  82. 'format_id': x['content_format'],
  83. 'preference': self._known_formats.index(x['content_format']),
  84. } for x in result['files'] if x['content_format'] in self._known_formats]
  85. self._sort_formats(formats)
  86. if not formats:
  87. raise ExtractorError('No media links available for %s' % video_id)
  88. duration = result['duration']
  89. compilation = result['compilation']
  90. title = result['title']
  91. title = '%s - %s' % (compilation, title) if compilation is not None else title
  92. previews = result['preview']
  93. previews.sort(key=lambda fmt: self._known_thumbnails.index(fmt['content_format']))
  94. thumbnail = previews[-1]['url'] if len(previews) > 0 else None
  95. video_page = self._download_webpage(url, video_id, 'Downloading video page')
  96. description = self._extract_description(video_page)
  97. comment_count = self._extract_comment_count(video_page)
  98. return {
  99. 'id': video_id,
  100. 'title': title,
  101. 'thumbnail': thumbnail,
  102. 'description': description,
  103. 'duration': duration,
  104. 'comment_count': comment_count,
  105. 'formats': formats,
  106. }
  107. class IviCompilationIE(InfoExtractor):
  108. IE_DESC = 'ivi.ru compilations'
  109. IE_NAME = 'ivi:compilation'
  110. _VALID_URL = r'https?://(?:www\.)?ivi\.ru/watch/(?!\d+)(?P<compilationid>[a-z\d_-]+)(?:/season(?P<seasonid>\d+))?$'
  111. _TESTS = [{
  112. 'url': 'http://www.ivi.ru/watch/dvoe_iz_lartsa',
  113. 'info_dict': {
  114. 'id': 'dvoe_iz_lartsa',
  115. 'title': 'Двое из ларца (2006 - 2008)',
  116. },
  117. 'playlist_mincount': 24,
  118. }, {
  119. 'url': 'http://www.ivi.ru/watch/dvoe_iz_lartsa/season1',
  120. 'info_dict': {
  121. 'id': 'dvoe_iz_lartsa/season1',
  122. 'title': 'Двое из ларца (2006 - 2008) 1 сезон',
  123. },
  124. 'playlist_mincount': 12,
  125. }]
  126. def _extract_entries(self, html, compilation_id):
  127. return [self.url_result('http://www.ivi.ru/watch/%s/%s' % (compilation_id, serie), 'Ivi')
  128. for serie in re.findall(r'<strong><a href="/watch/%s/(\d+)">(?:[^<]+)</a></strong>' % compilation_id, html)]
  129. def _real_extract(self, url):
  130. mobj = re.match(self._VALID_URL, url)
  131. compilation_id = mobj.group('compilationid')
  132. season_id = mobj.group('seasonid')
  133. if season_id is not None: # Season link
  134. season_page = self._download_webpage(url, compilation_id, 'Downloading season %s web page' % season_id)
  135. playlist_id = '%s/season%s' % (compilation_id, season_id)
  136. playlist_title = self._html_search_meta('title', season_page, 'title')
  137. entries = self._extract_entries(season_page, compilation_id)
  138. else: # Compilation link
  139. compilation_page = self._download_webpage(url, compilation_id, 'Downloading compilation web page')
  140. playlist_id = compilation_id
  141. playlist_title = self._html_search_meta('title', compilation_page, 'title')
  142. seasons = re.findall(r'<a href="/watch/%s/season(\d+)">[^<]+</a>' % compilation_id, compilation_page)
  143. if len(seasons) == 0: # No seasons in this compilation
  144. entries = self._extract_entries(compilation_page, compilation_id)
  145. else:
  146. entries = []
  147. for season_id in seasons:
  148. season_page = self._download_webpage(
  149. 'http://www.ivi.ru/watch/%s/season%s' % (compilation_id, season_id),
  150. compilation_id, 'Downloading season %s web page' % season_id)
  151. entries.extend(self._extract_entries(season_page, compilation_id))
  152. return self.playlist_result(entries, playlist_id, playlist_title)