You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

193 lines
7.3 KiB

Switch codebase to use sanitized_Request instead of compat_urllib_request.Request [downloader/dash] Use sanitized_Request [downloader/http] Use sanitized_Request [atresplayer] Use sanitized_Request [bambuser] Use sanitized_Request [bliptv] Use sanitized_Request [brightcove] Use sanitized_Request [cbs] Use sanitized_Request [ceskatelevize] Use sanitized_Request [collegerama] Use sanitized_Request [extractor/common] Use sanitized_Request [crunchyroll] Use sanitized_Request [dailymotion] Use sanitized_Request [dcn] Use sanitized_Request [dramafever] Use sanitized_Request [dumpert] Use sanitized_Request [eitb] Use sanitized_Request [escapist] Use sanitized_Request [everyonesmixtape] Use sanitized_Request [extremetube] Use sanitized_Request [facebook] Use sanitized_Request [fc2] Use sanitized_Request [flickr] Use sanitized_Request [4tube] Use sanitized_Request [gdcvault] Use sanitized_Request [extractor/generic] Use sanitized_Request [hearthisat] Use sanitized_Request [hotnewhiphop] Use sanitized_Request [hypem] Use sanitized_Request [iprima] Use sanitized_Request [ivi] Use sanitized_Request [keezmovies] Use sanitized_Request [letv] Use sanitized_Request [lynda] Use sanitized_Request [metacafe] Use sanitized_Request [minhateca] Use sanitized_Request [miomio] Use sanitized_Request [meovideo] Use sanitized_Request [mofosex] Use sanitized_Request [moniker] Use sanitized_Request [mooshare] Use sanitized_Request [movieclips] Use sanitized_Request [mtv] Use sanitized_Request [myvideo] Use sanitized_Request [neteasemusic] Use sanitized_Request [nfb] Use sanitized_Request [niconico] Use sanitized_Request [noco] Use sanitized_Request [nosvideo] Use sanitized_Request [novamov] Use sanitized_Request [nowness] Use sanitized_Request [nuvid] Use sanitized_Request [played] Use sanitized_Request [pluralsight] Use sanitized_Request [pornhub] Use sanitized_Request [pornotube] Use sanitized_Request [primesharetv] Use sanitized_Request [promptfile] Use sanitized_Request [qqmusic] Use sanitized_Request [rtve] Use sanitized_Request [safari] Use sanitized_Request [sandia] Use sanitized_Request [shared] Use sanitized_Request [sharesix] Use sanitized_Request [sina] Use sanitized_Request [smotri] Use sanitized_Request [sohu] Use sanitized_Request [spankwire] Use sanitized_Request [sportdeutschland] Use sanitized_Request [streamcloud] Use sanitized_Request [streamcz] Use sanitized_Request [tapely] Use sanitized_Request [tube8] Use sanitized_Request [tubitv] Use sanitized_Request [twitch] Use sanitized_Request [twitter] Use sanitized_Request [udemy] Use sanitized_Request [vbox7] Use sanitized_Request [veoh] Use sanitized_Request [vessel] Use sanitized_Request [vevo] Use sanitized_Request [viddler] Use sanitized_Request [videomega] Use sanitized_Request [viewvster] Use sanitized_Request [viki] Use sanitized_Request [vk] Use sanitized_Request [vodlocker] Use sanitized_Request [voicerepublic] Use sanitized_Request [wistia] Use sanitized_Request [xfileshare] Use sanitized_Request [xtube] Use sanitized_Request [xvideos] Use sanitized_Request [yandexmusic] Use sanitized_Request [youku] Use sanitized_Request [youporn] Use sanitized_Request [youtube] Use sanitized_Request [patreon] Use sanitized_Request [extractor/common] Remove unused import [nfb] PEP 8
9 years ago
10 years ago
11 years ago
11 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
11 years ago
10 years ago
10 years ago
10 years ago
  1. # encoding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. import json
  5. from .common import InfoExtractor
  6. from ..utils import (
  7. ExtractorError,
  8. int_or_none,
  9. sanitized_Request,
  10. )
  11. class IviIE(InfoExtractor):
  12. IE_DESC = 'ivi.ru'
  13. IE_NAME = 'ivi'
  14. _VALID_URL = r'https?://(?:www\.)?ivi\.ru/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P<id>\d+)'
  15. _TESTS = [
  16. # Single movie
  17. {
  18. 'url': 'http://www.ivi.ru/watch/53141',
  19. 'md5': '6ff5be2254e796ed346251d117196cf4',
  20. 'info_dict': {
  21. 'id': '53141',
  22. 'ext': 'mp4',
  23. 'title': 'Иван Васильевич меняет профессию',
  24. 'description': 'md5:b924063ea1677c8fe343d8a72ac2195f',
  25. 'duration': 5498,
  26. 'thumbnail': 're:^https?://.*\.jpg$',
  27. },
  28. 'skip': 'Only works from Russia',
  29. },
  30. # Serial's series
  31. {
  32. 'url': 'http://www.ivi.ru/watch/dvoe_iz_lartsa/9549',
  33. 'md5': '221f56b35e3ed815fde2df71032f4b3e',
  34. 'info_dict': {
  35. 'id': '9549',
  36. 'ext': 'mp4',
  37. 'title': 'Двое из ларца - Дело Гольдберга (1 часть)',
  38. 'series': 'Двое из ларца',
  39. 'season': 'Сезон 1',
  40. 'season_number': 1,
  41. 'episode': 'Дело Гольдберга (1 часть)',
  42. 'episode_number': 1,
  43. 'duration': 2655,
  44. 'thumbnail': 're:^https?://.*\.jpg$',
  45. },
  46. 'skip': 'Only works from Russia',
  47. }
  48. ]
  49. # Sorted by quality
  50. _KNOWN_FORMATS = ['MP4-low-mobile', 'MP4-mobile', 'FLV-lo', 'MP4-lo', 'FLV-hi', 'MP4-hi', 'MP4-SHQ']
  51. def _real_extract(self, url):
  52. video_id = self._match_id(url)
  53. data = {
  54. 'method': 'da.content.get',
  55. 'params': [
  56. video_id, {
  57. 'site': 's183',
  58. 'referrer': 'http://www.ivi.ru/watch/%s' % video_id,
  59. 'contentid': video_id
  60. }
  61. ]
  62. }
  63. request = sanitized_Request(
  64. 'http://api.digitalaccess.ru/api/json/', json.dumps(data))
  65. video_json = self._download_json(
  66. request, video_id, 'Downloading video JSON')
  67. if 'error' in video_json:
  68. error = video_json['error']
  69. if error['origin'] == 'NoRedisValidData':
  70. raise ExtractorError('Video %s does not exist' % video_id, expected=True)
  71. raise ExtractorError(
  72. 'Unable to download video %s: %s' % (video_id, error['message']),
  73. expected=True)
  74. result = video_json['result']
  75. formats = [{
  76. 'url': x['url'],
  77. 'format_id': x['content_format'],
  78. 'preference': self._KNOWN_FORMATS.index(x['content_format']),
  79. } for x in result['files'] if x['content_format'] in self._KNOWN_FORMATS]
  80. self._sort_formats(formats)
  81. title = result['title']
  82. duration = int_or_none(result.get('duration'))
  83. compilation = result.get('compilation')
  84. episode = title if compilation else None
  85. title = '%s - %s' % (compilation, title) if compilation is not None else title
  86. thumbnails = [{
  87. 'url': preview['url'],
  88. 'id': preview.get('content_format'),
  89. } for preview in result.get('preview', []) if preview.get('url')]
  90. webpage = self._download_webpage(url, video_id)
  91. season = self._search_regex(
  92. r'<li[^>]+class="season active"[^>]*><a[^>]+>([^<]+)',
  93. webpage, 'season', default=None)
  94. season_number = int_or_none(self._search_regex(
  95. r'<li[^>]+class="season active"[^>]*><a[^>]+data-season(?:-index)?="(\d+)"',
  96. webpage, 'season number', default=None))
  97. episode_number = int_or_none(self._search_regex(
  98. r'<meta[^>]+itemprop="episode"[^>]*>\s*<meta[^>]+itemprop="episodeNumber"[^>]+content="(\d+)',
  99. webpage, 'episode number', default=None))
  100. description = self._og_search_description(webpage, default=None) or self._html_search_meta(
  101. 'description', webpage, 'description', default=None)
  102. return {
  103. 'id': video_id,
  104. 'title': title,
  105. 'series': compilation,
  106. 'season': season,
  107. 'season_number': season_number,
  108. 'episode': episode,
  109. 'episode_number': episode_number,
  110. 'thumbnails': thumbnails,
  111. 'description': description,
  112. 'duration': duration,
  113. 'formats': formats,
  114. }
  115. class IviCompilationIE(InfoExtractor):
  116. IE_DESC = 'ivi.ru compilations'
  117. IE_NAME = 'ivi:compilation'
  118. _VALID_URL = r'https?://(?:www\.)?ivi\.ru/watch/(?!\d+)(?P<compilationid>[a-z\d_-]+)(?:/season(?P<seasonid>\d+))?$'
  119. _TESTS = [{
  120. 'url': 'http://www.ivi.ru/watch/dvoe_iz_lartsa',
  121. 'info_dict': {
  122. 'id': 'dvoe_iz_lartsa',
  123. 'title': 'Двое из ларца (2006 - 2008)',
  124. },
  125. 'playlist_mincount': 24,
  126. }, {
  127. 'url': 'http://www.ivi.ru/watch/dvoe_iz_lartsa/season1',
  128. 'info_dict': {
  129. 'id': 'dvoe_iz_lartsa/season1',
  130. 'title': 'Двое из ларца (2006 - 2008) 1 сезон',
  131. },
  132. 'playlist_mincount': 12,
  133. }]
  134. def _extract_entries(self, html, compilation_id):
  135. return [
  136. self.url_result(
  137. 'http://www.ivi.ru/watch/%s/%s' % (compilation_id, serie), IviIE.ie_key())
  138. for serie in re.findall(
  139. r'<a href="/watch/%s/(\d+)"[^>]+data-id="\1"' % compilation_id, html)]
  140. def _real_extract(self, url):
  141. mobj = re.match(self._VALID_URL, url)
  142. compilation_id = mobj.group('compilationid')
  143. season_id = mobj.group('seasonid')
  144. if season_id is not None: # Season link
  145. season_page = self._download_webpage(
  146. url, compilation_id, 'Downloading season %s web page' % season_id)
  147. playlist_id = '%s/season%s' % (compilation_id, season_id)
  148. playlist_title = self._html_search_meta('title', season_page, 'title')
  149. entries = self._extract_entries(season_page, compilation_id)
  150. else: # Compilation link
  151. compilation_page = self._download_webpage(url, compilation_id, 'Downloading compilation web page')
  152. playlist_id = compilation_id
  153. playlist_title = self._html_search_meta('title', compilation_page, 'title')
  154. seasons = re.findall(
  155. r'<a href="/watch/%s/season(\d+)' % compilation_id, compilation_page)
  156. if not seasons: # No seasons in this compilation
  157. entries = self._extract_entries(compilation_page, compilation_id)
  158. else:
  159. entries = []
  160. for season_id in seasons:
  161. season_page = self._download_webpage(
  162. 'http://www.ivi.ru/watch/%s/season%s' % (compilation_id, season_id),
  163. compilation_id, 'Downloading season %s web page' % season_id)
  164. entries.extend(self._extract_entries(season_page, compilation_id))
  165. return self.playlist_result(entries, playlist_id, playlist_title)