You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

277 lines
11 KiB

  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import base64
  4. import json
  5. import hashlib
  6. import hmac
  7. import random
  8. import string
  9. import time
  10. from .common import InfoExtractor
  11. from ..compat import (
  12. compat_HTTPError,
  13. compat_urllib_parse_urlencode,
  14. compat_urllib_parse,
  15. )
  16. from ..utils import (
  17. ExtractorError,
  18. float_or_none,
  19. int_or_none,
  20. )
  21. class VRVBaseIE(InfoExtractor):
  22. _API_DOMAIN = None
  23. _API_PARAMS = {}
  24. _CMS_SIGNING = {}
  25. _TOKEN = None
  26. _TOKEN_SECRET = ''
  27. def _call_api(self, path, video_id, note, data=None):
  28. # https://tools.ietf.org/html/rfc5849#section-3
  29. base_url = self._API_DOMAIN + '/core/' + path
  30. query = [
  31. ('oauth_consumer_key', self._API_PARAMS['oAuthKey']),
  32. ('oauth_nonce', ''.join([random.choice(string.ascii_letters) for _ in range(32)])),
  33. ('oauth_signature_method', 'HMAC-SHA1'),
  34. ('oauth_timestamp', int(time.time())),
  35. ]
  36. if self._TOKEN:
  37. query.append(('oauth_token', self._TOKEN))
  38. encoded_query = compat_urllib_parse_urlencode(query)
  39. headers = self.geo_verification_headers()
  40. if data:
  41. data = json.dumps(data).encode()
  42. headers['Content-Type'] = 'application/json'
  43. base_string = '&'.join([
  44. 'POST' if data else 'GET',
  45. compat_urllib_parse.quote(base_url, ''),
  46. compat_urllib_parse.quote(encoded_query, '')])
  47. oauth_signature = base64.b64encode(hmac.new(
  48. (self._API_PARAMS['oAuthSecret'] + '&' + self._TOKEN_SECRET).encode('ascii'),
  49. base_string.encode(), hashlib.sha1).digest()).decode()
  50. encoded_query += '&oauth_signature=' + compat_urllib_parse.quote(oauth_signature, '')
  51. try:
  52. return self._download_json(
  53. '?'.join([base_url, encoded_query]), video_id,
  54. note='Downloading %s JSON metadata' % note, headers=headers, data=data)
  55. except ExtractorError as e:
  56. if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
  57. raise ExtractorError(json.loads(e.cause.read().decode())['message'], expected=True)
  58. raise
  59. def _call_cms(self, path, video_id, note):
  60. if not self._CMS_SIGNING:
  61. index = self._call_api('index', video_id, 'CMS Signing')
  62. self._CMS_SIGNING = index.get('cms_signing') or {}
  63. if not self._CMS_SIGNING:
  64. for signing_policy in index.get('signing_policies', []):
  65. signing_path = signing_policy.get('path')
  66. if signing_path and signing_path.startswith('/cms/'):
  67. name, value = signing_policy.get('name'), signing_policy.get('value')
  68. if name and value:
  69. self._CMS_SIGNING[name] = value
  70. return self._download_json(
  71. self._API_DOMAIN + path, video_id, query=self._CMS_SIGNING,
  72. note='Downloading %s JSON metadata' % note, headers=self.geo_verification_headers())
  73. def _get_cms_resource(self, resource_key, video_id):
  74. return self._call_api(
  75. 'cms_resource', video_id, 'resource path', data={
  76. 'resource_key': resource_key,
  77. })['__links__']['cms_resource']['href']
  78. def _real_initialize(self):
  79. webpage = self._download_webpage(
  80. 'https://vrv.co/', None, headers=self.geo_verification_headers())
  81. self._API_PARAMS = self._parse_json(self._search_regex(
  82. [
  83. r'window\.__APP_CONFIG__\s*=\s*({.+?})(?:</script>|;)',
  84. r'window\.__APP_CONFIG__\s*=\s*({.+})'
  85. ], webpage, 'app config'), None)['cxApiParams']
  86. self._API_DOMAIN = self._API_PARAMS.get('apiDomain', 'https://api.vrv.co')
  87. class VRVIE(VRVBaseIE):
  88. IE_NAME = 'vrv'
  89. _VALID_URL = r'https?://(?:www\.)?vrv\.co/watch/(?P<id>[A-Z0-9]+)'
  90. _TESTS = [{
  91. 'url': 'https://vrv.co/watch/GR9PNZ396/Hidden-America-with-Jonah-Ray:BOSTON-WHERE-THE-PAST-IS-THE-PRESENT',
  92. 'info_dict': {
  93. 'id': 'GR9PNZ396',
  94. 'ext': 'mp4',
  95. 'title': 'BOSTON: WHERE THE PAST IS THE PRESENT',
  96. 'description': 'md5:4ec8844ac262ca2df9e67c0983c6b83f',
  97. 'uploader_id': 'seeso',
  98. },
  99. 'params': {
  100. # m3u8 download
  101. 'skip_download': True,
  102. },
  103. }, {
  104. # movie listing
  105. 'url': 'https://vrv.co/watch/G6NQXZ1J6/Lily-CAT',
  106. 'info_dict': {
  107. 'id': 'G6NQXZ1J6',
  108. 'title': 'Lily C.A.T',
  109. 'description': 'md5:988b031e7809a6aeb60968be4af7db07',
  110. },
  111. 'playlist_count': 2,
  112. }]
  113. _NETRC_MACHINE = 'vrv'
  114. def _real_initialize(self):
  115. super(VRVIE, self)._real_initialize()
  116. email, password = self._get_login_info()
  117. if email is None:
  118. return
  119. token_credentials = self._call_api(
  120. 'authenticate/by:credentials', None, 'Token Credentials', data={
  121. 'email': email,
  122. 'password': password,
  123. })
  124. self._TOKEN = token_credentials['oauth_token']
  125. self._TOKEN_SECRET = token_credentials['oauth_token_secret']
  126. def _extract_vrv_formats(self, url, video_id, stream_format, audio_lang, hardsub_lang):
  127. if not url or stream_format not in ('hls', 'dash', 'adaptive_hls'):
  128. return []
  129. stream_id_list = []
  130. if audio_lang:
  131. stream_id_list.append('audio-%s' % audio_lang)
  132. if hardsub_lang:
  133. stream_id_list.append('hardsub-%s' % hardsub_lang)
  134. format_id = stream_format
  135. if stream_id_list:
  136. format_id += '-' + '-'.join(stream_id_list)
  137. if 'hls' in stream_format:
  138. adaptive_formats = self._extract_m3u8_formats(
  139. url, video_id, 'mp4', m3u8_id=format_id,
  140. note='Downloading %s information' % format_id,
  141. fatal=False)
  142. elif stream_format == 'dash':
  143. adaptive_formats = self._extract_mpd_formats(
  144. url, video_id, mpd_id=format_id,
  145. note='Downloading %s information' % format_id,
  146. fatal=False)
  147. if audio_lang:
  148. for f in adaptive_formats:
  149. if f.get('acodec') != 'none':
  150. f['language'] = audio_lang
  151. return adaptive_formats
  152. def _real_extract(self, url):
  153. video_id = self._match_id(url)
  154. object_data = self._call_cms(self._get_cms_resource(
  155. 'cms:/objects/' + video_id, video_id), video_id, 'object')['items'][0]
  156. resource_path = object_data['__links__']['resource']['href']
  157. video_data = self._call_cms(resource_path, video_id, 'video')
  158. title = video_data['title']
  159. description = video_data.get('description')
  160. if video_data.get('__class__') == 'movie_listing':
  161. items = self._call_cms(
  162. video_data['__links__']['movie_listing/movies']['href'],
  163. video_id, 'movie listing').get('items') or []
  164. if len(items) != 1:
  165. entries = []
  166. for item in items:
  167. item_id = item.get('id')
  168. if not item_id:
  169. continue
  170. entries.append(self.url_result(
  171. 'https://vrv.co/watch/' + item_id,
  172. self.ie_key(), item_id, item.get('title')))
  173. return self.playlist_result(entries, video_id, title, description)
  174. video_data = items[0]
  175. streams_path = video_data['__links__'].get('streams', {}).get('href')
  176. if not streams_path:
  177. self.raise_login_required()
  178. streams_json = self._call_cms(streams_path, video_id, 'streams')
  179. audio_locale = streams_json.get('audio_locale')
  180. formats = []
  181. for stream_type, streams in streams_json.get('streams', {}).items():
  182. if stream_type in ('adaptive_hls', 'adaptive_dash'):
  183. for stream in streams.values():
  184. formats.extend(self._extract_vrv_formats(
  185. stream.get('url'), video_id, stream_type.split('_')[1],
  186. audio_locale, stream.get('hardsub_locale')))
  187. self._sort_formats(formats)
  188. subtitles = {}
  189. for k in ('captions', 'subtitles'):
  190. for subtitle in streams_json.get(k, {}).values():
  191. subtitle_url = subtitle.get('url')
  192. if not subtitle_url:
  193. continue
  194. subtitles.setdefault(subtitle.get('locale', 'en-US'), []).append({
  195. 'url': subtitle_url,
  196. 'ext': subtitle.get('format', 'ass'),
  197. })
  198. thumbnails = []
  199. for thumbnail in video_data.get('images', {}).get('thumbnails', []):
  200. thumbnail_url = thumbnail.get('source')
  201. if not thumbnail_url:
  202. continue
  203. thumbnails.append({
  204. 'url': thumbnail_url,
  205. 'width': int_or_none(thumbnail.get('width')),
  206. 'height': int_or_none(thumbnail.get('height')),
  207. })
  208. return {
  209. 'id': video_id,
  210. 'title': title,
  211. 'formats': formats,
  212. 'subtitles': subtitles,
  213. 'thumbnails': thumbnails,
  214. 'description': description,
  215. 'duration': float_or_none(video_data.get('duration_ms'), 1000),
  216. 'uploader_id': video_data.get('channel_id'),
  217. 'series': video_data.get('series_title'),
  218. 'season': video_data.get('season_title'),
  219. 'season_number': int_or_none(video_data.get('season_number')),
  220. 'season_id': video_data.get('season_id'),
  221. 'episode': title,
  222. 'episode_number': int_or_none(video_data.get('episode_number')),
  223. 'episode_id': video_data.get('production_episode_id'),
  224. }
  225. class VRVSeriesIE(VRVBaseIE):
  226. IE_NAME = 'vrv:series'
  227. _VALID_URL = r'https?://(?:www\.)?vrv\.co/series/(?P<id>[A-Z0-9]+)'
  228. _TEST = {
  229. 'url': 'https://vrv.co/series/G68VXG3G6/The-Perfect-Insider',
  230. 'info_dict': {
  231. 'id': 'G68VXG3G6',
  232. },
  233. 'playlist_mincount': 11,
  234. }
  235. def _real_extract(self, url):
  236. series_id = self._match_id(url)
  237. seasons_path = self._get_cms_resource(
  238. 'cms:/seasons?series_id=' + series_id, series_id)
  239. seasons_data = self._call_cms(seasons_path, series_id, 'seasons')
  240. entries = []
  241. for season in seasons_data.get('items', []):
  242. episodes_path = season['__links__']['season/episodes']['href']
  243. episodes = self._call_cms(episodes_path, series_id, 'episodes')
  244. for episode in episodes.get('items', []):
  245. episode_id = episode['id']
  246. entries.append(self.url_result(
  247. 'https://vrv.co/watch/' + episode_id,
  248. 'VRV', episode_id, episode.get('title')))
  249. return self.playlist_result(entries, series_id)