@ -1,31 +1,41 @@
# coding: utf-8
# coding: utf-8
from __future__ import unicode_literals , division
from __future__ import unicode_literals , division
import re
from .common import InfoExtractor
from .common import InfoExtractor
from ..utils import int_or_none
from ..compat import compat_str
from ..utils import (
determine_ext ,
float_or_none ,
int_or_none ,
parse_age_limit ,
parse_duration ,
)
class CrackleIE ( InfoExtractor ) :
class CrackleIE ( InfoExtractor ) :
_GEO_COUNTRIES = [ ' US ' ]
_GEO_COUNTRIES = [ ' US ' ]
_VALID_URL = r ' (?:crackle:|https?://(?:(?:www|m) \ .)?crackle \ .com/(?:playlist/ \ d+/|(?:[^/]+/)+))(?P<id> \ d+) '
_VALID_URL = r ' (?:crackle:|https?://(?:(?:www|m) \ .)?crackle \ .com/(?:playlist/ \ d+/|(?:[^/]+/)+))(?P<id> \ d+) '
_TEST = {
_TEST = {
' url ' : ' http://www.crackle.com/comedians-in-cars-getting-coffee/2498934 ' ,
' url ' : ' https://www.crackle.com/andromeda/2502343 ' ,
' info_dict ' : {
' info_dict ' : {
' id ' : ' 2498934 ' ,
' id ' : ' 2502343 ' ,
' ext ' : ' mp4 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Everybody Respects A Bloody Nose ' ,
' description ' : ' Jerry is kaffeeklatsching in L.A. with funnyman J.B. Smoove (Saturday Night Live, Real Husbands of Hollywood). They’re headed for brew at 10 Speed Coffee in a 1964 Studebaker Avanti. ' ,
' thumbnail ' : r ' re:^https?://.* \ .jpg ' ,
' duration ' : 906 ,
' series ' : ' Comedians In Cars Getting Coffee ' ,
' season_number ' : 8 ,
' episode_number ' : 4 ,
' subtitles ' : {
' en-US ' : [
{ ' ext ' : ' vtt ' } ,
{ ' ext ' : ' tt ' } ,
]
} ,
' title ' : ' Under The Night ' ,
' description ' : ' md5:d2b8ca816579ae8a7bf28bfff8cefc8a ' ,
' duration ' : 2583 ,
' view_count ' : int ,
' average_rating ' : 0 ,
' age_limit ' : 14 ,
' genre ' : ' Action, Sci-Fi ' ,
' creator ' : ' Allan Kroeker ' ,
' artist ' : ' Keith Hamilton Cobb, Kevin Sorbo, Lisa Ryder, Lexa Doig, Robert Hewitt Wolfe ' ,
' release_year ' : 2000 ,
' series ' : ' Andromeda ' ,
' episode ' : ' Under The Night ' ,
' season_number ' : 1 ,
' episode_number ' : 1 ,
} ,
} ,
' params ' : {
' params ' : {
# m3u8 download
# m3u8 download
@ -33,108 +43,95 @@ class CrackleIE(InfoExtractor):
}
}
}
}
_THUMBNAIL_RES = [
( 120 , 90 ) ,
( 208 , 156 ) ,
( 220 , 124 ) ,
( 220 , 220 ) ,
( 240 , 180 ) ,
( 250 , 141 ) ,
( 315 , 236 ) ,
( 320 , 180 ) ,
( 360 , 203 ) ,
( 400 , 300 ) ,
( 421 , 316 ) ,
( 460 , 330 ) ,
( 460 , 460 ) ,
( 462 , 260 ) ,
( 480 , 270 ) ,
( 587 , 330 ) ,
( 640 , 480 ) ,
( 700 , 330 ) ,
( 700 , 394 ) ,
( 854 , 480 ) ,
( 1024 , 1024 ) ,
( 1920 , 1080 ) ,
]
# extracted from http://legacyweb-us.crackle.com/flash/ReferrerRedirect.ashx
_MEDIA_FILE_SLOTS = {
' c544.flv ' : {
' width ' : 544 ,
' height ' : 306 ,
} ,
' 360p.mp4 ' : {
' width ' : 640 ,
' height ' : 360 ,
} ,
' 480p.mp4 ' : {
' width ' : 852 ,
' height ' : 478 ,
} ,
' 480p_1mbps.mp4 ' : {
' width ' : 852 ,
' height ' : 478 ,
} ,
}
def _real_extract ( self , url ) :
def _real_extract ( self , url ) :
video_id = self . _match_id ( url )
video_id = self . _match_id ( url )
config_doc = self . _download_xml (
' http://legacyweb-us.crackle.com/flash/QueryReferrer.ashx?site=16 ' ,
video_id , ' Downloading config ' )
media = self . _download_json (
' https://web-api-us.crackle.com/Service.svc/details/media/ %s / %s '
% ( video_id , self . _GEO_COUNTRIES [ 0 ] ) , video_id , query = {
' disableProtocols ' : ' true ' ,
' format ' : ' json '
} )
title = media [ ' Title ' ]
formats = [ ]
for e in media [ ' MediaURLs ' ] :
if e . get ( ' UseDRM ' ) is True :
continue
format_url = e . get ( ' Path ' )
if not format_url or not isinstance ( format_url , compat_str ) :
continue
ext = determine_ext ( format_url )
if ext == ' m3u8 ' :
formats . extend ( self . _extract_m3u8_formats (
format_url , video_id , ' mp4 ' , entry_protocol = ' m3u8_native ' ,
m3u8_id = ' hls ' , fatal = False ) )
elif ext == ' mpd ' :
formats . extend ( self . _extract_mpd_formats (
format_url , video_id , mpd_id = ' dash ' , fatal = False ) )
self . _sort_formats ( formats )
item = self . _download_xml (
' http://legacyweb-us.crackle.com/app/revamp/vidwallcache.aspx?flags=-1&fm= %s ' % video_id ,
video_id , headers = self . geo_verification_headers ( ) ) . find ( ' i ' )
title = item . attrib [ ' t ' ]
description = media . get ( ' Description ' )
duration = int_or_none ( media . get (
' DurationInSeconds ' ) ) or parse_duration ( media . get ( ' Duration ' ) )
view_count = int_or_none ( media . get ( ' CountViews ' ) )
average_rating = float_or_none ( media . get ( ' UserRating ' ) )
age_limit = parse_age_limit ( media . get ( ' Rating ' ) )
genre = media . get ( ' Genre ' )
release_year = int_or_none ( media . get ( ' ReleaseYear ' ) )
creator = media . get ( ' Directors ' )
artist = media . get ( ' Cast ' )
if media . get ( ' MediaTypeDisplayValue ' ) == ' Full Episode ' :
series = media . get ( ' ShowName ' )
episode = title
season_number = int_or_none ( media . get ( ' Season ' ) )
episode_number = int_or_none ( media . get ( ' Episode ' ) )
else :
series = episode = season_number = episode_number = None
subtitles = { }
subtitles = { }
formats = self . _extract_m3u8_formats (
' http://content.uplynk.com/ext/ %s / %s .m3u8 ' % ( config_doc . attrib [ ' strUplynkOwnerId ' ] , video_id ) ,
video_id , ' mp4 ' , m3u8_id = ' hls ' , fatal = None )
cc_files = media . get ( ' ClosedCaptionFiles ' )
if isinstance ( cc_files , list ) :
for cc_file in cc_files :
if not isinstance ( cc_file , dict ) :
continue
cc_url = cc_file . get ( ' Path ' )
if not cc_url or not isinstance ( cc_url , compat_str ) :
continue
lang = cc_file . get ( ' Locale ' ) or ' en '
subtitles . setdefault ( lang , [ ] ) . append ( { ' url ' : cc_url } )
thumbnails = [ ]
thumbnails = [ ]
path = item . attrib . get ( ' p ' )
if path :
for width , height in self . _THUMBNAIL_RES :
res = ' %d x %d ' % ( width , height )
images = media . get ( ' Images ' )
if isinstance ( images , list ) :
for image_key , image_url in images . items ( ) :
mobj = re . search ( r ' Img_( \ d+)[xX]( \ d+) ' , image_key )
if not mobj :
continue
thumbnails . append ( {
thumbnails . append ( {
' id ' : res ,
' url ' : ' http://images-us-am.crackle.com/ %s tnl_ %s .jpg ' % ( path , res ) ,
' width ' : width ,
' height ' : height ,
' resolution ' : res ,
} )
http_base_url = ' http://ahttp.crackle.com/ ' + path
for mfs_path , mfs_info in self . _MEDIA_FILE_SLOTS . items ( ) :
formats . append ( {
' url ' : http_base_url + mfs_path ,
' format_id ' : ' http- ' + mfs_path . split ( ' . ' ) [ 0 ] ,
' width ' : mfs_info [ ' width ' ] ,
' height ' : mfs_info [ ' height ' ] ,
' url ' : image_url ,
' width ' : int ( mobj . group ( 1 ) ) ,
' height ' : int ( mobj . group ( 2 ) ) ,
} )
} )
for cc in item . findall ( ' cc ' ) :
locale = cc . attrib . get ( ' l ' )
v = cc . attrib . get ( ' v ' )
if locale and v :
if locale not in subtitles :
subtitles [ locale ] = [ ]
for url_ext , ext in ( ( ' vtt ' , ' vtt ' ) , ( ' xml ' , ' tt ' ) ) :
subtitles . setdefault ( locale , [ ] ) . append ( {
' url ' : ' %s / %s %s _ %s . %s ' % ( config_doc . attrib [ ' strSubtitleServer ' ] , path , locale , v , url_ext ) ,
' ext ' : ext ,
} )
self . _sort_formats ( formats , ( ' width ' , ' height ' , ' tbr ' , ' format_id ' ) )
return {
return {
' id ' : video_id ,
' id ' : video_id ,
' title ' : title ,
' title ' : title ,
' description ' : item . attrib . get ( ' d ' ) ,
' duration ' : int ( item . attrib . get ( ' r ' ) , 16 ) / 1000 if item . attrib . get ( ' r ' ) else None ,
' series ' : item . attrib . get ( ' sn ' ) ,
' season_number ' : int_or_none ( item . attrib . get ( ' se ' ) ) ,
' episode_number ' : int_or_none ( item . attrib . get ( ' ep ' ) ) ,
' description ' : description ,
' duration ' : duration ,
' view_count ' : view_count ,
' average_rating ' : average_rating ,
' age_limit ' : age_limit ,
' genre ' : genre ,
' creator ' : creator ,
' artist ' : artist ,
' release_year ' : release_year ,
' series ' : series ,
' episode ' : episode ,
' season_number ' : season_number ,
' episode_number ' : episode_number ,
' thumbnails ' : thumbnails ,
' thumbnails ' : thumbnails ,
' subtitles ' : subtitles ,
' subtitles ' : subtitles ,
' formats ' : formats ,
' formats ' : formats ,