You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

200 lines
7.4 KiB

  1. # coding: utf-8
  2. from __future__ import unicode_literals, division
  3. import hashlib
  4. import hmac
  5. import re
  6. import time
  7. from .common import InfoExtractor
  8. from ..compat import compat_HTTPError
  9. from ..utils import (
  10. determine_ext,
  11. float_or_none,
  12. int_or_none,
  13. parse_age_limit,
  14. parse_duration,
  15. url_or_none,
  16. ExtractorError
  17. )
  18. class CrackleIE(InfoExtractor):
  19. _VALID_URL = r'(?:crackle:|https?://(?:(?:www|m)\.)?(?:sony)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)'
  20. _TESTS = [{
  21. # geo restricted to CA
  22. 'url': 'https://www.crackle.com/andromeda/2502343',
  23. 'info_dict': {
  24. 'id': '2502343',
  25. 'ext': 'mp4',
  26. 'title': 'Under The Night',
  27. 'description': 'md5:d2b8ca816579ae8a7bf28bfff8cefc8a',
  28. 'duration': 2583,
  29. 'view_count': int,
  30. 'average_rating': 0,
  31. 'age_limit': 14,
  32. 'genre': 'Action, Sci-Fi',
  33. 'creator': 'Allan Kroeker',
  34. 'artist': 'Keith Hamilton Cobb, Kevin Sorbo, Lisa Ryder, Lexa Doig, Robert Hewitt Wolfe',
  35. 'release_year': 2000,
  36. 'series': 'Andromeda',
  37. 'episode': 'Under The Night',
  38. 'season_number': 1,
  39. 'episode_number': 1,
  40. },
  41. 'params': {
  42. # m3u8 download
  43. 'skip_download': True,
  44. }
  45. }, {
  46. 'url': 'https://www.sonycrackle.com/andromeda/2502343',
  47. 'only_matching': True,
  48. }]
  49. _MEDIA_FILE_SLOTS = {
  50. '360p.mp4': {
  51. 'width': 640,
  52. 'height': 360,
  53. },
  54. '480p.mp4': {
  55. 'width': 768,
  56. 'height': 432,
  57. },
  58. '480p_1mbps.mp4': {
  59. 'width': 852,
  60. 'height': 480,
  61. },
  62. }
  63. def _real_extract(self, url):
  64. video_id = self._match_id(url)
  65. country_code = self._downloader.params.get('geo_bypass_country', None)
  66. countries = [country_code] if country_code else (
  67. 'US', 'AU', 'CA', 'AS', 'FM', 'GU', 'MP', 'PR', 'PW', 'MH', 'VI')
  68. last_e = None
  69. for country in countries:
  70. try:
  71. # Authorization generation algorithm is reverse engineered from:
  72. # https://www.sonycrackle.com/static/js/main.ea93451f.chunk.js
  73. media_detail_url = 'https://web-api-us.crackle.com/Service.svc/details/media/%s/%s?disableProtocols=true' % (video_id, country)
  74. timestamp = time.strftime('%Y%m%d%H%M', time.gmtime())
  75. h = hmac.new(b'IGSLUQCBDFHEOIFM', '|'.join([media_detail_url, timestamp]).encode(), hashlib.sha1).hexdigest().upper()
  76. media = self._download_json(
  77. media_detail_url, video_id, 'Downloading media JSON as %s' % country,
  78. 'Unable to download media JSON', headers={
  79. 'Accept': 'application/json',
  80. 'Authorization': '|'.join([h, timestamp, '117', '1']),
  81. })
  82. except ExtractorError as e:
  83. # 401 means geo restriction, trying next country
  84. if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
  85. last_e = e
  86. continue
  87. raise
  88. media_urls = media.get('MediaURLs')
  89. if not media_urls or not isinstance(media_urls, list):
  90. continue
  91. title = media['Title']
  92. formats = []
  93. for e in media['MediaURLs']:
  94. if e.get('UseDRM') is True:
  95. continue
  96. format_url = url_or_none(e.get('Path'))
  97. if not format_url:
  98. continue
  99. ext = determine_ext(format_url)
  100. if ext == 'm3u8':
  101. formats.extend(self._extract_m3u8_formats(
  102. format_url, video_id, 'mp4', entry_protocol='m3u8_native',
  103. m3u8_id='hls', fatal=False))
  104. elif ext == 'mpd':
  105. formats.extend(self._extract_mpd_formats(
  106. format_url, video_id, mpd_id='dash', fatal=False))
  107. elif format_url.endswith('.ism/Manifest'):
  108. formats.extend(self._extract_ism_formats(
  109. format_url, video_id, ism_id='mss', fatal=False))
  110. else:
  111. mfs_path = e.get('Type')
  112. mfs_info = self._MEDIA_FILE_SLOTS.get(mfs_path)
  113. if not mfs_info:
  114. continue
  115. formats.append({
  116. 'url': format_url,
  117. 'format_id': 'http-' + mfs_path.split('.')[0],
  118. 'width': mfs_info['width'],
  119. 'height': mfs_info['height'],
  120. })
  121. self._sort_formats(formats)
  122. description = media.get('Description')
  123. duration = int_or_none(media.get(
  124. 'DurationInSeconds')) or parse_duration(media.get('Duration'))
  125. view_count = int_or_none(media.get('CountViews'))
  126. average_rating = float_or_none(media.get('UserRating'))
  127. age_limit = parse_age_limit(media.get('Rating'))
  128. genre = media.get('Genre')
  129. release_year = int_or_none(media.get('ReleaseYear'))
  130. creator = media.get('Directors')
  131. artist = media.get('Cast')
  132. if media.get('MediaTypeDisplayValue') == 'Full Episode':
  133. series = media.get('ShowName')
  134. episode = title
  135. season_number = int_or_none(media.get('Season'))
  136. episode_number = int_or_none(media.get('Episode'))
  137. else:
  138. series = episode = season_number = episode_number = None
  139. subtitles = {}
  140. cc_files = media.get('ClosedCaptionFiles')
  141. if isinstance(cc_files, list):
  142. for cc_file in cc_files:
  143. if not isinstance(cc_file, dict):
  144. continue
  145. cc_url = url_or_none(cc_file.get('Path'))
  146. if not cc_url:
  147. continue
  148. lang = cc_file.get('Locale') or 'en'
  149. subtitles.setdefault(lang, []).append({'url': cc_url})
  150. thumbnails = []
  151. images = media.get('Images')
  152. if isinstance(images, list):
  153. for image_key, image_url in images.items():
  154. mobj = re.search(r'Img_(\d+)[xX](\d+)', image_key)
  155. if not mobj:
  156. continue
  157. thumbnails.append({
  158. 'url': image_url,
  159. 'width': int(mobj.group(1)),
  160. 'height': int(mobj.group(2)),
  161. })
  162. return {
  163. 'id': video_id,
  164. 'title': title,
  165. 'description': description,
  166. 'duration': duration,
  167. 'view_count': view_count,
  168. 'average_rating': average_rating,
  169. 'age_limit': age_limit,
  170. 'genre': genre,
  171. 'creator': creator,
  172. 'artist': artist,
  173. 'release_year': release_year,
  174. 'series': series,
  175. 'episode': episode,
  176. 'season_number': season_number,
  177. 'episode_number': episode_number,
  178. 'thumbnails': thumbnails,
  179. 'subtitles': subtitles,
  180. 'formats': formats,
  181. }
  182. raise last_e