You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

165 lines
6.0 KiB

  1. # coding: utf-8
  2. from __future__ import unicode_literals, division
  3. import re
  4. from .common import InfoExtractor
  5. from ..compat import compat_HTTPError
  6. from ..utils import (
  7. determine_ext,
  8. float_or_none,
  9. int_or_none,
  10. parse_age_limit,
  11. parse_duration,
  12. url_or_none,
  13. ExtractorError
  14. )
  15. class CrackleIE(InfoExtractor):
  16. _VALID_URL = r'(?:crackle:|https?://(?:(?:www|m)\.)?(?:sony)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)'
  17. _TESTS = [{
  18. # geo restricted to CA
  19. 'url': 'https://www.crackle.com/andromeda/2502343',
  20. 'info_dict': {
  21. 'id': '2502343',
  22. 'ext': 'mp4',
  23. 'title': 'Under The Night',
  24. 'description': 'md5:d2b8ca816579ae8a7bf28bfff8cefc8a',
  25. 'duration': 2583,
  26. 'view_count': int,
  27. 'average_rating': 0,
  28. 'age_limit': 14,
  29. 'genre': 'Action, Sci-Fi',
  30. 'creator': 'Allan Kroeker',
  31. 'artist': 'Keith Hamilton Cobb, Kevin Sorbo, Lisa Ryder, Lexa Doig, Robert Hewitt Wolfe',
  32. 'release_year': 2000,
  33. 'series': 'Andromeda',
  34. 'episode': 'Under The Night',
  35. 'season_number': 1,
  36. 'episode_number': 1,
  37. },
  38. 'params': {
  39. # m3u8 download
  40. 'skip_download': True,
  41. }
  42. }, {
  43. 'url': 'https://www.sonycrackle.com/andromeda/2502343',
  44. 'only_matching': True,
  45. }]
  46. def _real_extract(self, url):
  47. video_id = self._match_id(url)
  48. country_code = self._downloader.params.get('geo_bypass_country', None)
  49. countries = [country_code] if country_code else (
  50. 'US', 'AU', 'CA', 'AS', 'FM', 'GU', 'MP', 'PR', 'PW', 'MH', 'VI')
  51. last_e = None
  52. for country in countries:
  53. try:
  54. media = self._download_json(
  55. 'https://web-api-us.crackle.com/Service.svc/details/media/%s/%s'
  56. % (video_id, country), video_id,
  57. 'Downloading media JSON as %s' % country,
  58. 'Unable to download media JSON', query={
  59. 'disableProtocols': 'true',
  60. 'format': 'json'
  61. })
  62. except ExtractorError as e:
  63. # 401 means geo restriction, trying next country
  64. if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
  65. last_e = e
  66. continue
  67. raise
  68. media_urls = media.get('MediaURLs')
  69. if not media_urls or not isinstance(media_urls, list):
  70. continue
  71. title = media['Title']
  72. formats = []
  73. for e in media['MediaURLs']:
  74. if e.get('UseDRM') is True:
  75. continue
  76. format_url = url_or_none(e.get('Path'))
  77. if not format_url:
  78. continue
  79. ext = determine_ext(format_url)
  80. if ext == 'm3u8':
  81. formats.extend(self._extract_m3u8_formats(
  82. format_url, video_id, 'mp4', entry_protocol='m3u8_native',
  83. m3u8_id='hls', fatal=False))
  84. elif ext == 'mpd':
  85. formats.extend(self._extract_mpd_formats(
  86. format_url, video_id, mpd_id='dash', fatal=False))
  87. self._sort_formats(formats)
  88. description = media.get('Description')
  89. duration = int_or_none(media.get(
  90. 'DurationInSeconds')) or parse_duration(media.get('Duration'))
  91. view_count = int_or_none(media.get('CountViews'))
  92. average_rating = float_or_none(media.get('UserRating'))
  93. age_limit = parse_age_limit(media.get('Rating'))
  94. genre = media.get('Genre')
  95. release_year = int_or_none(media.get('ReleaseYear'))
  96. creator = media.get('Directors')
  97. artist = media.get('Cast')
  98. if media.get('MediaTypeDisplayValue') == 'Full Episode':
  99. series = media.get('ShowName')
  100. episode = title
  101. season_number = int_or_none(media.get('Season'))
  102. episode_number = int_or_none(media.get('Episode'))
  103. else:
  104. series = episode = season_number = episode_number = None
  105. subtitles = {}
  106. cc_files = media.get('ClosedCaptionFiles')
  107. if isinstance(cc_files, list):
  108. for cc_file in cc_files:
  109. if not isinstance(cc_file, dict):
  110. continue
  111. cc_url = url_or_none(cc_file.get('Path'))
  112. if not cc_url:
  113. continue
  114. lang = cc_file.get('Locale') or 'en'
  115. subtitles.setdefault(lang, []).append({'url': cc_url})
  116. thumbnails = []
  117. images = media.get('Images')
  118. if isinstance(images, list):
  119. for image_key, image_url in images.items():
  120. mobj = re.search(r'Img_(\d+)[xX](\d+)', image_key)
  121. if not mobj:
  122. continue
  123. thumbnails.append({
  124. 'url': image_url,
  125. 'width': int(mobj.group(1)),
  126. 'height': int(mobj.group(2)),
  127. })
  128. return {
  129. 'id': video_id,
  130. 'title': title,
  131. 'description': description,
  132. 'duration': duration,
  133. 'view_count': view_count,
  134. 'average_rating': average_rating,
  135. 'age_limit': age_limit,
  136. 'genre': genre,
  137. 'creator': creator,
  138. 'artist': artist,
  139. 'release_year': release_year,
  140. 'series': series,
  141. 'episode': episode,
  142. 'season_number': season_number,
  143. 'episode_number': episode_number,
  144. 'thumbnails': thumbnails,
  145. 'subtitles': subtitles,
  146. 'formats': formats,
  147. }
  148. raise last_e