You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

313 lines
11 KiB

  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import hashlib
  4. import random
  5. import re
  6. import time
  7. from .common import InfoExtractor
  8. from ..compat import compat_str
  9. from ..utils import (
  10. ExtractorError,
  11. int_or_none,
  12. parse_duration,
  13. try_get,
  14. urlencode_postdata,
  15. )
  16. class NexxIE(InfoExtractor):
  17. _VALID_URL = r'''(?x)
  18. (?:
  19. https?://api\.nexx(?:\.cloud|cdn\.com)/v3/(?P<domain_id>\d+)/videos/byid/|
  20. nexx:(?P<domain_id_s>\d+):
  21. )
  22. (?P<id>\d+)
  23. '''
  24. _TESTS = [{
  25. # movie
  26. 'url': 'https://api.nexx.cloud/v3/748/videos/byid/128907',
  27. 'md5': '828cea195be04e66057b846288295ba1',
  28. 'info_dict': {
  29. 'id': '128907',
  30. 'ext': 'mp4',
  31. 'title': 'Stiftung Warentest',
  32. 'alt_title': 'Wie ein Test abläuft',
  33. 'description': 'md5:d1ddb1ef63de721132abd38639cc2fd2',
  34. 'release_year': 2013,
  35. 'creator': 'SPIEGEL TV',
  36. 'thumbnail': r're:^https?://.*\.jpg$',
  37. 'duration': 2509,
  38. 'timestamp': 1384264416,
  39. 'upload_date': '20131112',
  40. },
  41. }, {
  42. # episode
  43. 'url': 'https://api.nexx.cloud/v3/741/videos/byid/247858',
  44. 'info_dict': {
  45. 'id': '247858',
  46. 'ext': 'mp4',
  47. 'title': 'Return of the Golden Child (OV)',
  48. 'description': 'md5:5d969537509a92b733de21bae249dc63',
  49. 'release_year': 2017,
  50. 'thumbnail': r're:^https?://.*\.jpg$',
  51. 'duration': 1397,
  52. 'timestamp': 1495033267,
  53. 'upload_date': '20170517',
  54. 'episode_number': 2,
  55. 'season_number': 2,
  56. },
  57. 'params': {
  58. 'skip_download': True,
  59. },
  60. }, {
  61. 'url': 'https://api.nexxcdn.com/v3/748/videos/byid/128907',
  62. 'only_matching': True,
  63. }, {
  64. 'url': 'nexx:748:128907',
  65. 'only_matching': True,
  66. }]
  67. @staticmethod
  68. def _extract_domain_id(webpage):
  69. mobj = re.search(
  70. r'<script\b[^>]+\bsrc=["\'](?:https?:)?//require\.nexx(?:\.cloud|cdn\.com)/(?P<id>\d+)',
  71. webpage)
  72. return mobj.group('id') if mobj else None
  73. @staticmethod
  74. def _extract_urls(webpage):
  75. # Reference:
  76. # 1. https://nx-s.akamaized.net/files/201510/44.pdf
  77. entries = []
  78. # JavaScript Integration
  79. domain_id = NexxIE._extract_domain_id(webpage)
  80. if domain_id:
  81. for video_id in re.findall(
  82. r'(?is)onPLAYReady.+?_play\.init\s*\(.+?\s*,\s*["\']?(\d+)',
  83. webpage):
  84. entries.append(
  85. 'https://api.nexx.cloud/v3/%s/videos/byid/%s'
  86. % (domain_id, video_id))
  87. # TODO: support more embed formats
  88. return entries
  89. @staticmethod
  90. def _extract_url(webpage):
  91. return NexxIE._extract_urls(webpage)[0]
  92. def _handle_error(self, response):
  93. status = int_or_none(try_get(
  94. response, lambda x: x['metadata']['status']) or 200)
  95. if 200 <= status < 300:
  96. return
  97. raise ExtractorError(
  98. '%s said: %s' % (self.IE_NAME, response['metadata']['errorhint']),
  99. expected=True)
  100. def _call_api(self, domain_id, path, video_id, data=None, headers={}):
  101. headers['Content-Type'] = 'application/x-www-form-urlencoded; charset=UTF-8'
  102. result = self._download_json(
  103. 'https://api.nexx.cloud/v3/%s/%s' % (domain_id, path), video_id,
  104. 'Downloading %s JSON' % path, data=urlencode_postdata(data),
  105. headers=headers)
  106. self._handle_error(result)
  107. return result['result']
  108. def _real_extract(self, url):
  109. mobj = re.match(self._VALID_URL, url)
  110. domain_id = mobj.group('domain_id') or mobj.group('domain_id_s')
  111. video_id = mobj.group('id')
  112. # Reverse engineered from JS code (see getDeviceID function)
  113. device_id = '%d:%d:%d%d' % (
  114. random.randint(1, 4), int(time.time()),
  115. random.randint(1e4, 99999), random.randint(1, 9))
  116. result = self._call_api(domain_id, 'session/init', video_id, data={
  117. 'nxp_devh': device_id,
  118. 'nxp_userh': '',
  119. 'precid': '0',
  120. 'playlicense': '0',
  121. 'screenx': '1920',
  122. 'screeny': '1080',
  123. 'playerversion': '6.0.00',
  124. 'gateway': 'html5',
  125. 'adGateway': '',
  126. 'explicitlanguage': 'en-US',
  127. 'addTextTemplates': '1',
  128. 'addDomainData': '1',
  129. 'addAdModel': '1',
  130. }, headers={
  131. 'X-Request-Enable-Auth-Fallback': '1',
  132. })
  133. cid = result['general']['cid']
  134. # As described in [1] X-Request-Token generation algorithm is
  135. # as follows:
  136. # md5( operation + domain_id + domain_secret )
  137. # where domain_secret is a static value that will be given by nexx.tv
  138. # as per [1]. Here is how this "secret" is generated (reversed
  139. # from _play.api.init function, search for clienttoken). So it's
  140. # actually not static and not that much of a secret.
  141. # 1. https://nexxtvstorage.blob.core.windows.net/files/201610/27.pdf
  142. secret = result['device']['clienttoken'][int(device_id[0]):]
  143. secret = secret[0:len(secret) - int(device_id[-1])]
  144. op = 'byid'
  145. # Reversed from JS code for _play.api.call function (search for
  146. # X-Request-Token)
  147. request_token = hashlib.md5(
  148. ''.join((op, domain_id, secret)).encode('utf-8')).hexdigest()
  149. video = self._call_api(
  150. domain_id, 'videos/%s/%s' % (op, video_id), video_id, data={
  151. 'additionalfields': 'language,channel,actors,studio,licenseby,slug,subtitle,teaser,description',
  152. 'addInteractionOptions': '1',
  153. 'addStatusDetails': '1',
  154. 'addStreamDetails': '1',
  155. 'addCaptions': '1',
  156. 'addScenes': '1',
  157. 'addHotSpots': '1',
  158. 'addBumpers': '1',
  159. 'captionFormat': 'data',
  160. }, headers={
  161. 'X-Request-CID': cid,
  162. 'X-Request-Token': request_token,
  163. })
  164. general = video['general']
  165. title = general['title']
  166. stream_data = video['streamdata']
  167. language = general.get('language_raw') or ''
  168. # TODO: reverse more cdns
  169. cdn = stream_data['cdnType']
  170. assert cdn == 'azure'
  171. azure_locator = stream_data['azureLocator']
  172. AZURE_URL = 'http://nx%s%02d.akamaized.net/'
  173. def get_cdn_shield_base(shield_type='', prefix='-p'):
  174. for secure in ('', 's'):
  175. cdn_shield = stream_data.get('cdnShield%sHTTP%s' % (shield_type, secure.upper()))
  176. if cdn_shield:
  177. return 'http%s://%s' % (secure, cdn_shield)
  178. else:
  179. return AZURE_URL % (prefix, int(stream_data['azureAccount'].replace('nexxplayplus', '')))
  180. azure_stream_base = get_cdn_shield_base()
  181. is_ml = ',' in language
  182. azure_manifest_url = '%s%s/%s_src%s.ism/Manifest' % (
  183. azure_stream_base, azure_locator, video_id, ('_manifest' if is_ml else '')) + '%s'
  184. protection_token = try_get(
  185. video, lambda x: x['protectiondata']['token'], compat_str)
  186. if protection_token:
  187. azure_manifest_url += '?hdnts=%s' % protection_token
  188. formats = self._extract_m3u8_formats(
  189. azure_manifest_url % '(format=m3u8-aapl)',
  190. video_id, 'mp4', 'm3u8_native',
  191. m3u8_id='%s-hls' % cdn, fatal=False)
  192. formats.extend(self._extract_mpd_formats(
  193. azure_manifest_url % '(format=mpd-time-csf)',
  194. video_id, mpd_id='%s-dash' % cdn, fatal=False))
  195. formats.extend(self._extract_ism_formats(
  196. azure_manifest_url % '', video_id, ism_id='%s-mss' % cdn, fatal=False))
  197. azure_progressive_base = get_cdn_shield_base('Prog', '-d')
  198. azure_file_distribution = stream_data.get('azureFileDistribution')
  199. if azure_file_distribution:
  200. fds = azure_file_distribution.split(',')
  201. if fds:
  202. for fd in fds:
  203. ss = fd.split(':')
  204. if len(ss) == 2:
  205. tbr = int_or_none(ss[0])
  206. if tbr:
  207. f = {
  208. 'url': '%s%s/%s_src_%s_%d.mp4' % (
  209. azure_progressive_base, azure_locator, video_id, ss[1], tbr),
  210. 'format_id': '%s-http-%d' % (cdn, tbr),
  211. 'tbr': tbr,
  212. }
  213. width_height = ss[1].split('x')
  214. if len(width_height) == 2:
  215. f.update({
  216. 'width': int_or_none(width_height[0]),
  217. 'height': int_or_none(width_height[1]),
  218. })
  219. formats.append(f)
  220. self._sort_formats(formats)
  221. return {
  222. 'id': video_id,
  223. 'title': title,
  224. 'alt_title': general.get('subtitle'),
  225. 'description': general.get('description'),
  226. 'release_year': int_or_none(general.get('year')),
  227. 'creator': general.get('studio') or general.get('studio_adref'),
  228. 'thumbnail': try_get(
  229. video, lambda x: x['imagedata']['thumb'], compat_str),
  230. 'duration': parse_duration(general.get('runtime')),
  231. 'timestamp': int_or_none(general.get('uploaded')),
  232. 'episode_number': int_or_none(try_get(
  233. video, lambda x: x['episodedata']['episode'])),
  234. 'season_number': int_or_none(try_get(
  235. video, lambda x: x['episodedata']['season'])),
  236. 'formats': formats,
  237. }
  238. class NexxEmbedIE(InfoExtractor):
  239. _VALID_URL = r'https?://embed\.nexx(?:\.cloud|cdn\.com)/\d+/(?P<id>[^/?#&]+)'
  240. _TEST = {
  241. 'url': 'http://embed.nexx.cloud/748/KC1614647Z27Y7T?autoplay=1',
  242. 'md5': '16746bfc28c42049492385c989b26c4a',
  243. 'info_dict': {
  244. 'id': '161464',
  245. 'ext': 'mp4',
  246. 'title': 'Nervenkitzel Achterbahn',
  247. 'alt_title': 'Karussellbauer in Deutschland',
  248. 'description': 'md5:ffe7b1cc59a01f585e0569949aef73cc',
  249. 'release_year': 2005,
  250. 'creator': 'SPIEGEL TV',
  251. 'thumbnail': r're:^https?://.*\.jpg$',
  252. 'duration': 2761,
  253. 'timestamp': 1394021479,
  254. 'upload_date': '20140305',
  255. },
  256. 'params': {
  257. 'format': 'bestvideo',
  258. 'skip_download': True,
  259. },
  260. }
  261. @staticmethod
  262. def _extract_urls(webpage):
  263. # Reference:
  264. # 1. https://nx-s.akamaized.net/files/201510/44.pdf
  265. # iFrame Embed Integration
  266. return [mobj.group('url') for mobj in re.finditer(
  267. r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//embed\.nexx(?:\.cloud|cdn\.com)/\d+/(?:(?!\1).)+)\1',
  268. webpage)]
  269. def _real_extract(self, url):
  270. embed_id = self._match_id(url)
  271. webpage = self._download_webpage(url, embed_id)
  272. return self.url_result(NexxIE._extract_url(webpage), ie=NexxIE.ie_key())