|
|
@ -4,20 +4,24 @@ from __future__ import unicode_literals, division |
|
|
|
import re |
|
|
|
|
|
|
|
from .common import InfoExtractor |
|
|
|
from ..compat import compat_str |
|
|
|
from ..compat import ( |
|
|
|
compat_str, |
|
|
|
compat_HTTPError, |
|
|
|
) |
|
|
|
from ..utils import ( |
|
|
|
determine_ext, |
|
|
|
float_or_none, |
|
|
|
int_or_none, |
|
|
|
parse_age_limit, |
|
|
|
parse_duration, |
|
|
|
ExtractorError |
|
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
class CrackleIE(InfoExtractor): |
|
|
|
_GEO_COUNTRIES = ['US'] |
|
|
|
_VALID_URL = r'(?:crackle:|https?://(?:(?:www|m)\.)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)' |
|
|
|
_TEST = { |
|
|
|
# geo restricted to CA |
|
|
|
'url': 'https://www.crackle.com/andromeda/2502343', |
|
|
|
'info_dict': { |
|
|
|
'id': '2502343', |
|
|
@ -46,93 +50,115 @@ class CrackleIE(InfoExtractor): |
|
|
|
def _real_extract(self, url): |
|
|
|
video_id = self._match_id(url) |
|
|
|
|
|
|
|
media = self._download_json( |
|
|
|
'https://web-api-us.crackle.com/Service.svc/details/media/%s/%s' |
|
|
|
% (video_id, self._GEO_COUNTRIES[0]), video_id, query={ |
|
|
|
'disableProtocols': 'true', |
|
|
|
'format': 'json' |
|
|
|
}) |
|
|
|
country_code = self._downloader.params.get('geo_bypass_country', None) |
|
|
|
countries = [country_code] if country_code else ( |
|
|
|
'US', 'AU', 'CA', 'AS', 'FM', 'GU', 'MP', 'PR', 'PW', 'MH', 'VI') |
|
|
|
|
|
|
|
title = media['Title'] |
|
|
|
last_e = None |
|
|
|
|
|
|
|
formats = [] |
|
|
|
for e in media['MediaURLs']: |
|
|
|
if e.get('UseDRM') is True: |
|
|
|
continue |
|
|
|
format_url = e.get('Path') |
|
|
|
if not format_url or not isinstance(format_url, compat_str): |
|
|
|
continue |
|
|
|
ext = determine_ext(format_url) |
|
|
|
if ext == 'm3u8': |
|
|
|
formats.extend(self._extract_m3u8_formats( |
|
|
|
format_url, video_id, 'mp4', entry_protocol='m3u8_native', |
|
|
|
m3u8_id='hls', fatal=False)) |
|
|
|
elif ext == 'mpd': |
|
|
|
formats.extend(self._extract_mpd_formats( |
|
|
|
format_url, video_id, mpd_id='dash', fatal=False)) |
|
|
|
self._sort_formats(formats) |
|
|
|
|
|
|
|
description = media.get('Description') |
|
|
|
duration = int_or_none(media.get( |
|
|
|
'DurationInSeconds')) or parse_duration(media.get('Duration')) |
|
|
|
view_count = int_or_none(media.get('CountViews')) |
|
|
|
average_rating = float_or_none(media.get('UserRating')) |
|
|
|
age_limit = parse_age_limit(media.get('Rating')) |
|
|
|
genre = media.get('Genre') |
|
|
|
release_year = int_or_none(media.get('ReleaseYear')) |
|
|
|
creator = media.get('Directors') |
|
|
|
artist = media.get('Cast') |
|
|
|
|
|
|
|
if media.get('MediaTypeDisplayValue') == 'Full Episode': |
|
|
|
series = media.get('ShowName') |
|
|
|
episode = title |
|
|
|
season_number = int_or_none(media.get('Season')) |
|
|
|
episode_number = int_or_none(media.get('Episode')) |
|
|
|
else: |
|
|
|
series = episode = season_number = episode_number = None |
|
|
|
|
|
|
|
subtitles = {} |
|
|
|
cc_files = media.get('ClosedCaptionFiles') |
|
|
|
if isinstance(cc_files, list): |
|
|
|
for cc_file in cc_files: |
|
|
|
if not isinstance(cc_file, dict): |
|
|
|
for country in countries: |
|
|
|
try: |
|
|
|
media = self._download_json( |
|
|
|
'https://web-api-us.crackle.com/Service.svc/details/media/%s/%s' |
|
|
|
% (video_id, country), video_id, |
|
|
|
'Downloading media JSON as %s' % country, |
|
|
|
'Unable to download media JSON', query={ |
|
|
|
'disableProtocols': 'true', |
|
|
|
'format': 'json' |
|
|
|
}) |
|
|
|
except ExtractorError as e: |
|
|
|
# 401 means geo restriction, trying next country |
|
|
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: |
|
|
|
last_e = e |
|
|
|
continue |
|
|
|
cc_url = cc_file.get('Path') |
|
|
|
if not cc_url or not isinstance(cc_url, compat_str): |
|
|
|
raise |
|
|
|
|
|
|
|
media_urls = media.get('MediaURLs') |
|
|
|
if not media_urls or not isinstance(media_urls, list): |
|
|
|
continue |
|
|
|
|
|
|
|
title = media['Title'] |
|
|
|
|
|
|
|
formats = [] |
|
|
|
for e in media['MediaURLs']: |
|
|
|
if e.get('UseDRM') is True: |
|
|
|
continue |
|
|
|
lang = cc_file.get('Locale') or 'en' |
|
|
|
subtitles.setdefault(lang, []).append({'url': cc_url}) |
|
|
|
|
|
|
|
thumbnails = [] |
|
|
|
images = media.get('Images') |
|
|
|
if isinstance(images, list): |
|
|
|
for image_key, image_url in images.items(): |
|
|
|
mobj = re.search(r'Img_(\d+)[xX](\d+)', image_key) |
|
|
|
if not mobj: |
|
|
|
format_url = e.get('Path') |
|
|
|
if not format_url or not isinstance(format_url, compat_str): |
|
|
|
continue |
|
|
|
thumbnails.append({ |
|
|
|
'url': image_url, |
|
|
|
'width': int(mobj.group(1)), |
|
|
|
'height': int(mobj.group(2)), |
|
|
|
}) |
|
|
|
|
|
|
|
return { |
|
|
|
'id': video_id, |
|
|
|
'title': title, |
|
|
|
'description': description, |
|
|
|
'duration': duration, |
|
|
|
'view_count': view_count, |
|
|
|
'average_rating': average_rating, |
|
|
|
'age_limit': age_limit, |
|
|
|
'genre': genre, |
|
|
|
'creator': creator, |
|
|
|
'artist': artist, |
|
|
|
'release_year': release_year, |
|
|
|
'series': series, |
|
|
|
'episode': episode, |
|
|
|
'season_number': season_number, |
|
|
|
'episode_number': episode_number, |
|
|
|
'thumbnails': thumbnails, |
|
|
|
'subtitles': subtitles, |
|
|
|
'formats': formats, |
|
|
|
} |
|
|
|
ext = determine_ext(format_url) |
|
|
|
if ext == 'm3u8': |
|
|
|
formats.extend(self._extract_m3u8_formats( |
|
|
|
format_url, video_id, 'mp4', entry_protocol='m3u8_native', |
|
|
|
m3u8_id='hls', fatal=False)) |
|
|
|
elif ext == 'mpd': |
|
|
|
formats.extend(self._extract_mpd_formats( |
|
|
|
format_url, video_id, mpd_id='dash', fatal=False)) |
|
|
|
self._sort_formats(formats) |
|
|
|
|
|
|
|
description = media.get('Description') |
|
|
|
duration = int_or_none(media.get( |
|
|
|
'DurationInSeconds')) or parse_duration(media.get('Duration')) |
|
|
|
view_count = int_or_none(media.get('CountViews')) |
|
|
|
average_rating = float_or_none(media.get('UserRating')) |
|
|
|
age_limit = parse_age_limit(media.get('Rating')) |
|
|
|
genre = media.get('Genre') |
|
|
|
release_year = int_or_none(media.get('ReleaseYear')) |
|
|
|
creator = media.get('Directors') |
|
|
|
artist = media.get('Cast') |
|
|
|
|
|
|
|
if media.get('MediaTypeDisplayValue') == 'Full Episode': |
|
|
|
series = media.get('ShowName') |
|
|
|
episode = title |
|
|
|
season_number = int_or_none(media.get('Season')) |
|
|
|
episode_number = int_or_none(media.get('Episode')) |
|
|
|
else: |
|
|
|
series = episode = season_number = episode_number = None |
|
|
|
|
|
|
|
subtitles = {} |
|
|
|
cc_files = media.get('ClosedCaptionFiles') |
|
|
|
if isinstance(cc_files, list): |
|
|
|
for cc_file in cc_files: |
|
|
|
if not isinstance(cc_file, dict): |
|
|
|
continue |
|
|
|
cc_url = cc_file.get('Path') |
|
|
|
if not cc_url or not isinstance(cc_url, compat_str): |
|
|
|
continue |
|
|
|
lang = cc_file.get('Locale') or 'en' |
|
|
|
subtitles.setdefault(lang, []).append({'url': cc_url}) |
|
|
|
|
|
|
|
thumbnails = [] |
|
|
|
images = media.get('Images') |
|
|
|
if isinstance(images, list): |
|
|
|
for image_key, image_url in images.items(): |
|
|
|
mobj = re.search(r'Img_(\d+)[xX](\d+)', image_key) |
|
|
|
if not mobj: |
|
|
|
continue |
|
|
|
thumbnails.append({ |
|
|
|
'url': image_url, |
|
|
|
'width': int(mobj.group(1)), |
|
|
|
'height': int(mobj.group(2)), |
|
|
|
}) |
|
|
|
|
|
|
|
return { |
|
|
|
'id': video_id, |
|
|
|
'title': title, |
|
|
|
'description': description, |
|
|
|
'duration': duration, |
|
|
|
'view_count': view_count, |
|
|
|
'average_rating': average_rating, |
|
|
|
'age_limit': age_limit, |
|
|
|
'genre': genre, |
|
|
|
'creator': creator, |
|
|
|
'artist': artist, |
|
|
|
'release_year': release_year, |
|
|
|
'series': series, |
|
|
|
'episode': episode, |
|
|
|
'season_number': season_number, |
|
|
|
'episode_number': episode_number, |
|
|
|
'thumbnails': thumbnails, |
|
|
|
'subtitles': subtitles, |
|
|
|
'formats': formats, |
|
|
|
} |
|
|
|
|
|
|
|
raise last_e |