You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

374 lines
15 KiB

  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. from .common import InfoExtractor
  5. from ..compat import compat_HTTPError
  6. from ..utils import (
  7. determine_ext,
  8. float_or_none,
  9. int_or_none,
  10. smuggle_url,
  11. unsmuggle_url,
  12. ExtractorError,
  13. )
  14. class LimelightBaseIE(InfoExtractor):
  15. _PLAYLIST_SERVICE_URL = 'http://production-ps.lvp.llnw.net/r/PlaylistService/%s/%s/%s'
  16. _API_URL = 'http://api.video.limelight.com/rest/organizations/%s/%s/%s/%s.json'
  17. @classmethod
  18. def _extract_urls(cls, webpage, source_url):
  19. lm = {
  20. 'Media': 'media',
  21. 'Channel': 'channel',
  22. 'ChannelList': 'channel_list',
  23. }
  24. def smuggle(url):
  25. return smuggle_url(url, {'source_url': source_url})
  26. entries = []
  27. for kind, video_id in re.findall(
  28. r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})',
  29. webpage):
  30. entries.append(cls.url_result(
  31. smuggle('limelight:%s:%s' % (lm[kind], video_id)),
  32. 'Limelight%s' % kind, video_id))
  33. for mobj in re.finditer(
  34. # As per [1] class attribute should be exactly equal to
  35. # LimelightEmbeddedPlayerFlash but numerous examples seen
  36. # that don't exactly match it (e.g. [2]).
  37. # 1. http://support.3playmedia.com/hc/en-us/articles/227732408-Limelight-Embedding-the-Captions-Plugin-with-the-Limelight-Player-on-Your-Webpage
  38. # 2. http://www.sedona.com/FacilitatorTraining2017
  39. r'''(?sx)
  40. <object[^>]+class=(["\'])(?:(?!\1).)*\bLimelightEmbeddedPlayerFlash\b(?:(?!\1).)*\1[^>]*>.*?
  41. <param[^>]+
  42. name=(["\'])flashVars\2[^>]+
  43. value=(["\'])(?:(?!\3).)*(?P<kind>media|channel(?:List)?)Id=(?P<id>[a-z0-9]{32})
  44. ''', webpage):
  45. kind, video_id = mobj.group('kind'), mobj.group('id')
  46. entries.append(cls.url_result(
  47. smuggle('limelight:%s:%s' % (kind, video_id)),
  48. 'Limelight%s' % kind.capitalize(), video_id))
  49. # http://support.3playmedia.com/hc/en-us/articles/115009517327-Limelight-Embedding-the-Audio-Description-Plugin-with-the-Limelight-Player-on-Your-Web-Page)
  50. for video_id in re.findall(
  51. r'(?s)LimelightPlayerUtil\.embed\s*\(\s*{.*?\bmediaId["\']\s*:\s*["\'](?P<id>[a-z0-9]{32})',
  52. webpage):
  53. entries.append(cls.url_result(
  54. smuggle('limelight:media:%s' % video_id),
  55. LimelightMediaIE.ie_key(), video_id))
  56. return entries
  57. def _call_playlist_service(self, item_id, method, fatal=True, referer=None):
  58. headers = {}
  59. if referer:
  60. headers['Referer'] = referer
  61. try:
  62. return self._download_json(
  63. self._PLAYLIST_SERVICE_URL % (self._PLAYLIST_SERVICE_PATH, item_id, method),
  64. item_id, 'Downloading PlaylistService %s JSON' % method, fatal=fatal, headers=headers)
  65. except ExtractorError as e:
  66. if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
  67. error = self._parse_json(e.cause.read().decode(), item_id)['detail']['contentAccessPermission']
  68. if error == 'CountryDisabled':
  69. self.raise_geo_restricted()
  70. raise ExtractorError(error, expected=True)
  71. raise
  72. def _call_api(self, organization_id, item_id, method):
  73. return self._download_json(
  74. self._API_URL % (organization_id, self._API_PATH, item_id, method),
  75. item_id, 'Downloading API %s JSON' % method)
  76. def _extract(self, item_id, pc_method, mobile_method, meta_method, referer=None):
  77. pc = self._call_playlist_service(item_id, pc_method, referer=referer)
  78. metadata = self._call_api(pc['orgId'], item_id, meta_method)
  79. mobile = self._call_playlist_service(item_id, mobile_method, fatal=False, referer=referer)
  80. return pc, mobile, metadata
  81. def _extract_info(self, streams, mobile_urls, properties):
  82. video_id = properties['media_id']
  83. formats = []
  84. urls = []
  85. for stream in streams:
  86. stream_url = stream.get('url')
  87. if not stream_url or stream.get('drmProtected') or stream_url in urls:
  88. continue
  89. urls.append(stream_url)
  90. ext = determine_ext(stream_url)
  91. if ext == 'f4m':
  92. formats.extend(self._extract_f4m_formats(
  93. stream_url, video_id, f4m_id='hds', fatal=False))
  94. else:
  95. fmt = {
  96. 'url': stream_url,
  97. 'abr': float_or_none(stream.get('audioBitRate')),
  98. 'fps': float_or_none(stream.get('videoFrameRate')),
  99. 'ext': ext,
  100. }
  101. width = int_or_none(stream.get('videoWidthInPixels'))
  102. height = int_or_none(stream.get('videoHeightInPixels'))
  103. vbr = float_or_none(stream.get('videoBitRate'))
  104. if width or height or vbr:
  105. fmt.update({
  106. 'width': width,
  107. 'height': height,
  108. 'vbr': vbr,
  109. })
  110. else:
  111. fmt['vcodec'] = 'none'
  112. rtmp = re.search(r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+))/(?P<playpath>mp[34]:.+)$', stream_url)
  113. if rtmp:
  114. format_id = 'rtmp'
  115. if stream.get('videoBitRate'):
  116. format_id += '-%d' % int_or_none(stream['videoBitRate'])
  117. http_format_id = format_id.replace('rtmp', 'http')
  118. CDN_HOSTS = (
  119. ('delvenetworks.com', 'cpl.delvenetworks.com'),
  120. ('video.llnw.net', 's2.content.video.llnw.net'),
  121. )
  122. for cdn_host, http_host in CDN_HOSTS:
  123. if cdn_host not in rtmp.group('host').lower():
  124. continue
  125. http_url = 'http://%s/%s' % (http_host, rtmp.group('playpath')[4:])
  126. urls.append(http_url)
  127. if self._is_valid_url(http_url, video_id, http_format_id):
  128. http_fmt = fmt.copy()
  129. http_fmt.update({
  130. 'url': http_url,
  131. 'format_id': http_format_id,
  132. })
  133. formats.append(http_fmt)
  134. break
  135. fmt.update({
  136. 'url': rtmp.group('url'),
  137. 'play_path': rtmp.group('playpath'),
  138. 'app': rtmp.group('app'),
  139. 'ext': 'flv',
  140. 'format_id': format_id,
  141. })
  142. formats.append(fmt)
  143. for mobile_url in mobile_urls:
  144. media_url = mobile_url.get('mobileUrl')
  145. format_id = mobile_url.get('targetMediaPlatform')
  146. if not media_url or format_id in ('Widevine', 'SmoothStreaming') or media_url in urls:
  147. continue
  148. urls.append(media_url)
  149. ext = determine_ext(media_url)
  150. if ext == 'm3u8':
  151. formats.extend(self._extract_m3u8_formats(
  152. media_url, video_id, 'mp4', 'm3u8_native',
  153. m3u8_id=format_id, fatal=False))
  154. elif ext == 'f4m':
  155. formats.extend(self._extract_f4m_formats(
  156. stream_url, video_id, f4m_id=format_id, fatal=False))
  157. else:
  158. formats.append({
  159. 'url': media_url,
  160. 'format_id': format_id,
  161. 'preference': -1,
  162. 'ext': ext,
  163. })
  164. self._sort_formats(formats)
  165. title = properties['title']
  166. description = properties.get('description')
  167. timestamp = int_or_none(properties.get('publish_date') or properties.get('create_date'))
  168. duration = float_or_none(properties.get('duration_in_milliseconds'), 1000)
  169. filesize = int_or_none(properties.get('total_storage_in_bytes'))
  170. categories = [properties.get('category')]
  171. tags = properties.get('tags', [])
  172. thumbnails = [{
  173. 'url': thumbnail['url'],
  174. 'width': int_or_none(thumbnail.get('width')),
  175. 'height': int_or_none(thumbnail.get('height')),
  176. } for thumbnail in properties.get('thumbnails', []) if thumbnail.get('url')]
  177. subtitles = {}
  178. for caption in properties.get('captions', []):
  179. lang = caption.get('language_code')
  180. subtitles_url = caption.get('url')
  181. if lang and subtitles_url:
  182. subtitles.setdefault(lang, []).append({
  183. 'url': subtitles_url,
  184. })
  185. closed_captions_url = properties.get('closed_captions_url')
  186. if closed_captions_url:
  187. subtitles.setdefault('en', []).append({
  188. 'url': closed_captions_url,
  189. 'ext': 'ttml',
  190. })
  191. return {
  192. 'id': video_id,
  193. 'title': title,
  194. 'description': description,
  195. 'formats': formats,
  196. 'timestamp': timestamp,
  197. 'duration': duration,
  198. 'filesize': filesize,
  199. 'categories': categories,
  200. 'tags': tags,
  201. 'thumbnails': thumbnails,
  202. 'subtitles': subtitles,
  203. }
  204. class LimelightMediaIE(LimelightBaseIE):
  205. IE_NAME = 'limelight'
  206. _VALID_URL = r'''(?x)
  207. (?:
  208. limelight:media:|
  209. https?://
  210. (?:
  211. link\.videoplatform\.limelight\.com/media/|
  212. assets\.delvenetworks\.com/player/loader\.swf
  213. )
  214. \?.*?\bmediaId=
  215. )
  216. (?P<id>[a-z0-9]{32})
  217. '''
  218. _TESTS = [{
  219. 'url': 'http://link.videoplatform.limelight.com/media/?mediaId=3ffd040b522b4485b6d84effc750cd86',
  220. 'info_dict': {
  221. 'id': '3ffd040b522b4485b6d84effc750cd86',
  222. 'ext': 'mp4',
  223. 'title': 'HaP and the HB Prince Trailer',
  224. 'description': 'md5:8005b944181778e313d95c1237ddb640',
  225. 'thumbnail': r're:^https?://.*\.jpeg$',
  226. 'duration': 144.23,
  227. 'timestamp': 1244136834,
  228. 'upload_date': '20090604',
  229. },
  230. 'params': {
  231. # m3u8 download
  232. 'skip_download': True,
  233. },
  234. }, {
  235. # video with subtitles
  236. 'url': 'limelight:media:a3e00274d4564ec4a9b29b9466432335',
  237. 'md5': '2fa3bad9ac321e23860ca23bc2c69e3d',
  238. 'info_dict': {
  239. 'id': 'a3e00274d4564ec4a9b29b9466432335',
  240. 'ext': 'mp4',
  241. 'title': '3Play Media Overview Video',
  242. 'thumbnail': r're:^https?://.*\.jpeg$',
  243. 'duration': 78.101,
  244. 'timestamp': 1338929955,
  245. 'upload_date': '20120605',
  246. 'subtitles': 'mincount:9',
  247. },
  248. }, {
  249. 'url': 'https://assets.delvenetworks.com/player/loader.swf?mediaId=8018a574f08d416e95ceaccae4ba0452',
  250. 'only_matching': True,
  251. }]
  252. _PLAYLIST_SERVICE_PATH = 'media'
  253. _API_PATH = 'media'
  254. def _real_extract(self, url):
  255. url, smuggled_data = unsmuggle_url(url, {})
  256. video_id = self._match_id(url)
  257. self._initialize_geo_bypass(smuggled_data.get('geo_countries'))
  258. pc, mobile, metadata = self._extract(
  259. video_id, 'getPlaylistByMediaId',
  260. 'getMobilePlaylistByMediaId', 'properties',
  261. smuggled_data.get('source_url'))
  262. return self._extract_info(
  263. pc['playlistItems'][0].get('streams', []),
  264. mobile['mediaList'][0].get('mobileUrls', []) if mobile else [],
  265. metadata)
  266. class LimelightChannelIE(LimelightBaseIE):
  267. IE_NAME = 'limelight:channel'
  268. _VALID_URL = r'''(?x)
  269. (?:
  270. limelight:channel:|
  271. https?://
  272. (?:
  273. link\.videoplatform\.limelight\.com/media/|
  274. assets\.delvenetworks\.com/player/loader\.swf
  275. )
  276. \?.*?\bchannelId=
  277. )
  278. (?P<id>[a-z0-9]{32})
  279. '''
  280. _TESTS = [{
  281. 'url': 'http://link.videoplatform.limelight.com/media/?channelId=ab6a524c379342f9b23642917020c082',
  282. 'info_dict': {
  283. 'id': 'ab6a524c379342f9b23642917020c082',
  284. 'title': 'Javascript Sample Code',
  285. },
  286. 'playlist_mincount': 3,
  287. }, {
  288. 'url': 'http://assets.delvenetworks.com/player/loader.swf?channelId=ab6a524c379342f9b23642917020c082',
  289. 'only_matching': True,
  290. }]
  291. _PLAYLIST_SERVICE_PATH = 'channel'
  292. _API_PATH = 'channels'
  293. def _real_extract(self, url):
  294. url, smuggled_data = unsmuggle_url(url, {})
  295. channel_id = self._match_id(url)
  296. pc, mobile, medias = self._extract(
  297. channel_id, 'getPlaylistByChannelId',
  298. 'getMobilePlaylistWithNItemsByChannelId?begin=0&count=-1',
  299. 'media', smuggled_data.get('source_url'))
  300. entries = [
  301. self._extract_info(
  302. pc['playlistItems'][i].get('streams', []),
  303. mobile['mediaList'][i].get('mobileUrls', []) if mobile else [],
  304. medias['media_list'][i])
  305. for i in range(len(medias['media_list']))]
  306. return self.playlist_result(entries, channel_id, pc['title'])
  307. class LimelightChannelListIE(LimelightBaseIE):
  308. IE_NAME = 'limelight:channel_list'
  309. _VALID_URL = r'''(?x)
  310. (?:
  311. limelight:channel_list:|
  312. https?://
  313. (?:
  314. link\.videoplatform\.limelight\.com/media/|
  315. assets\.delvenetworks\.com/player/loader\.swf
  316. )
  317. \?.*?\bchannelListId=
  318. )
  319. (?P<id>[a-z0-9]{32})
  320. '''
  321. _TESTS = [{
  322. 'url': 'http://link.videoplatform.limelight.com/media/?channelListId=301b117890c4465c8179ede21fd92e2b',
  323. 'info_dict': {
  324. 'id': '301b117890c4465c8179ede21fd92e2b',
  325. 'title': 'Website - Hero Player',
  326. },
  327. 'playlist_mincount': 2,
  328. }, {
  329. 'url': 'https://assets.delvenetworks.com/player/loader.swf?channelListId=301b117890c4465c8179ede21fd92e2b',
  330. 'only_matching': True,
  331. }]
  332. _PLAYLIST_SERVICE_PATH = 'channel_list'
  333. def _real_extract(self, url):
  334. channel_list_id = self._match_id(url)
  335. channel_list = self._call_playlist_service(channel_list_id, 'getMobileChannelListById')
  336. entries = [
  337. self.url_result('limelight:channel:%s' % channel['id'], 'LimelightChannel')
  338. for channel in channel_list['channelList']]
  339. return self.playlist_result(entries, channel_list_id, channel_list['title'])