@ -1,15 +1,23 @@
# coding: utf-8
import collections
import errno
import io
import itertools
import json
import netrc
import os.path
import re
import socket
import itertools
import string
import struct
import traceback
import xml.etree.ElementTree
import zlib
from .common import InfoExtractor , SearchInfoExtractor
from .subtitles import SubtitlesInfoExtractor
from ..utils import (
compat_chr ,
compat_http_client ,
compat_parse_qs ,
compat_urllib_error ,
@ -23,6 +31,7 @@ from ..utils import (
unescapeHTML ,
unified_strdate ,
orderedSet ,
write_json_file ,
)
class YoutubeBaseInfoExtractor ( InfoExtractor ) :
@ -139,7 +148,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
(
( ? : https ? : / / ) ? # http(s):// (optional)
( ? : ( ? : ( ? : ( ? : \w + \. ) ? youtube ( ? : - nocookie ) ? \. com / |
tube \. majestyc \. net / ) # the various hostnames, with wildcard subdomains
tube \. majestyc \. net / |
youtube \. googleapis \. com / ) # the various hostnames, with wildcard subdomains
( ? : . * ? \#/)? # handle anchor (#/) redirect urls
( ? : # the various things that can precede the ID:
( ? : ( ? : v | embed | e ) / ) # v/ or embed/ or e/
@ -351,7 +361,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
u " info_dict " : {
u " upload_date " : u " 20120506 " ,
u " title " : u " Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO] " ,
u " description " : u " md5:3e2666e0a55044490499ea45fe9037b 7 " ,
u " description " : u " md5:5b292926389560516e384ac437c0ec0 7 " ,
u " uploader " : u " Icona Pop " ,
u " uploader_id " : u " IconaPop "
}
@ -368,21 +378,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
u " uploader_id " : u " justintimberlakeVEVO "
}
} ,
{
u ' url ' : u ' https://www.youtube.com/watch?v=TGi3HqYrWHE ' ,
u ' file ' : u ' TGi3HqYrWHE.mp4 ' ,
u ' note ' : u ' m3u8 video ' ,
u ' info_dict ' : {
u ' title ' : u ' Triathlon - Men - London 2012 Olympic Games ' ,
u ' description ' : u ' - Men - TR02 - Triathlon - 07 August 2012 - London 2012 Olympic Games ' ,
u ' uploader ' : u ' olympic ' ,
u ' upload_date ' : u ' 20120807 ' ,
u ' uploader_id ' : u ' olympic ' ,
} ,
u ' params ' : {
u ' skip_download ' : True ,
} ,
} ,
]
@ -392,6 +387,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
if YoutubePlaylistIE . suitable ( url ) : return False
return re . match ( cls . _VALID_URL , url , re . VERBOSE ) is not None
def __init__ ( self , * args , * * kwargs ) :
super ( YoutubeIE , self ) . __init__ ( * args , * * kwargs )
self . _player_cache = { }
def report_video_webpage_download ( self , video_id ) :
""" Report attempt to download video webpage. """
self . to_screen ( u ' %s : Downloading video webpage ' % video_id )
@ -412,11 +411,664 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
""" Indicate the download will use the RTMP protocol. """
self . to_screen ( u ' RTMP download detected ' )
def _decrypt_signature ( self , s ) :
def _extract_signature_function ( self , video_id , player_url , slen ) :
id_m = re . match ( r ' .*-(?P<id>[a-zA-Z0-9_-]+) \ .(?P<ext>[a-z]+)$ ' ,
player_url )
player_type = id_m . group ( ' ext ' )
player_id = id_m . group ( ' id ' )
# Read from filesystem cache
func_id = ' %s _ %s _ %d ' % ( player_type , player_id , slen )
assert os . path . basename ( func_id ) == func_id
cache_dir = self . _downloader . params . get ( ' cachedir ' ,
u ' ~/.youtube-dl/cache ' )
cache_enabled = cache_dir is not None
if cache_enabled :
cache_fn = os . path . join ( os . path . expanduser ( cache_dir ) ,
u ' youtube-sigfuncs ' ,
func_id + ' .json ' )
try :
with io . open ( cache_fn , ' r ' , encoding = ' utf-8 ' ) as cachef :
cache_spec = json . load ( cachef )
return lambda s : u ' ' . join ( s [ i ] for i in cache_spec )
except IOError :
pass # No cache available
if player_type == ' js ' :
code = self . _download_webpage (
player_url , video_id ,
note = u ' Downloading %s player %s ' % ( player_type , player_id ) ,
errnote = u ' Download of %s failed ' % player_url )
res = self . _parse_sig_js ( code )
elif player_type == ' swf ' :
urlh = self . _request_webpage (
player_url , video_id ,
note = u ' Downloading %s player %s ' % ( player_type , player_id ) ,
errnote = u ' Download of %s failed ' % player_url )
code = urlh . read ( )
res = self . _parse_sig_swf ( code )
else :
assert False , ' Invalid player type %r ' % player_type
if cache_enabled :
try :
test_string = u ' ' . join ( map ( compat_chr , range ( slen ) ) )
cache_res = res ( test_string )
cache_spec = [ ord ( c ) for c in cache_res ]
try :
os . makedirs ( os . path . dirname ( cache_fn ) )
except OSError as ose :
if ose . errno != errno . EEXIST :
raise
write_json_file ( cache_spec , cache_fn )
except Exception :
tb = traceback . format_exc ( )
self . _downloader . report_warning (
u ' Writing cache to %r failed: %s ' % ( cache_fn , tb ) )
return res
def _print_sig_code ( self , func , slen ) :
def gen_sig_code ( idxs ) :
def _genslice ( start , end , step ) :
starts = u ' ' if start == 0 else str ( start )
ends = ( u ' : %d ' % ( end + step ) ) if end + step > = 0 else u ' : '
steps = u ' ' if step == 1 else ( u ' : %d ' % step )
return u ' s[ %s %s %s ] ' % ( starts , ends , steps )
step = None
start = ' (Never used) ' # Quelch pyflakes warnings - start will be
# set as soon as step is set
for i , prev in zip ( idxs [ 1 : ] , idxs [ : - 1 ] ) :
if step is not None :
if i - prev == step :
continue
yield _genslice ( start , prev , step )
step = None
continue
if i - prev in [ - 1 , 1 ] :
step = i - prev
start = prev
continue
else :
yield u ' s[ %d ] ' % prev
if step is None :
yield u ' s[ %d ] ' % i
else :
yield _genslice ( start , i , step )
test_string = u ' ' . join ( map ( compat_chr , range ( slen ) ) )
cache_res = func ( test_string )
cache_spec = [ ord ( c ) for c in cache_res ]
expr_code = u ' + ' . join ( gen_sig_code ( cache_spec ) )
code = u ' if len(s) == %d : \n return %s \n ' % ( slen , expr_code )
self . to_screen ( u ' Extracted signature function: \n ' + code )
def _parse_sig_js ( self , jscode ) :
funcname = self . _search_regex (
r ' signature=([a-zA-Z]+) ' , jscode ,
u ' Initial JS player signature function name ' )
functions = { }
def argidx ( varname ) :
return string . lowercase . index ( varname )
def interpret_statement ( stmt , local_vars , allow_recursion = 20 ) :
if allow_recursion < 0 :
raise ExtractorError ( u ' Recursion limit reached ' )
if stmt . startswith ( u ' var ' ) :
stmt = stmt [ len ( u ' var ' ) : ]
ass_m = re . match ( r ' ^(?P<out>[a-z]+)(?: \ [(?P<index>[^ \ ]]+) \ ])? ' +
r ' =(?P<expr>.*)$ ' , stmt )
if ass_m :
if ass_m . groupdict ( ) . get ( ' index ' ) :
def assign ( val ) :
lvar = local_vars [ ass_m . group ( ' out ' ) ]
idx = interpret_expression ( ass_m . group ( ' index ' ) ,
local_vars , allow_recursion )
assert isinstance ( idx , int )
lvar [ idx ] = val
return val
expr = ass_m . group ( ' expr ' )
else :
def assign ( val ) :
local_vars [ ass_m . group ( ' out ' ) ] = val
return val
expr = ass_m . group ( ' expr ' )
elif stmt . startswith ( u ' return ' ) :
assign = lambda v : v
expr = stmt [ len ( u ' return ' ) : ]
else :
raise ExtractorError (
u ' Cannot determine left side of statement in %r ' % stmt )
v = interpret_expression ( expr , local_vars , allow_recursion )
return assign ( v )
def interpret_expression ( expr , local_vars , allow_recursion ) :
if expr . isdigit ( ) :
return int ( expr )
if expr . isalpha ( ) :
return local_vars [ expr ]
m = re . match ( r ' ^(?P<in>[a-z]+) \ .(?P<member>.*)$ ' , expr )
if m :
member = m . group ( ' member ' )
val = local_vars [ m . group ( ' in ' ) ]
if member == ' split( " " ) ' :
return list ( val )
if member == ' join( " " ) ' :
return u ' ' . join ( val )
if member == ' length ' :
return len ( val )
if member == ' reverse() ' :
return val [ : : - 1 ]
slice_m = re . match ( r ' slice \ ((?P<idx>.*) \ ) ' , member )
if slice_m :
idx = interpret_expression (
slice_m . group ( ' idx ' ) , local_vars , allow_recursion - 1 )
return val [ idx : ]
m = re . match (
r ' ^(?P<in>[a-z]+) \ [(?P<idx>.+) \ ]$ ' , expr )
if m :
val = local_vars [ m . group ( ' in ' ) ]
idx = interpret_expression ( m . group ( ' idx ' ) , local_vars ,
allow_recursion - 1 )
return val [ idx ]
m = re . match ( r ' ^(?P<a>.+?)(?P<op>[ % ])(?P<b>.+?)$ ' , expr )
if m :
a = interpret_expression ( m . group ( ' a ' ) ,
local_vars , allow_recursion )
b = interpret_expression ( m . group ( ' b ' ) ,
local_vars , allow_recursion )
return a % b
m = re . match (
r ' ^(?P<func>[a-zA-Z]+) \ ((?P<args>[a-z0-9,]+) \ )$ ' , expr )
if m :
fname = m . group ( ' func ' )
if fname not in functions :
functions [ fname ] = extract_function ( fname )
argvals = [ int ( v ) if v . isdigit ( ) else local_vars [ v ]
for v in m . group ( ' args ' ) . split ( ' , ' ) ]
return functions [ fname ] ( argvals )
raise ExtractorError ( u ' Unsupported JS expression %r ' % expr )
def extract_function ( funcname ) :
func_m = re . search (
r ' function ' + re . escape ( funcname ) +
r ' \ ((?P<args>[a-z,]+) \ ){(?P<code>[^}]+)} ' ,
jscode )
argnames = func_m . group ( ' args ' ) . split ( ' , ' )
def resf ( args ) :
local_vars = dict ( zip ( argnames , args ) )
for stmt in func_m . group ( ' code ' ) . split ( ' ; ' ) :
res = interpret_statement ( stmt , local_vars )
return res
return resf
initial_function = extract_function ( funcname )
return lambda s : initial_function ( [ s ] )
def _parse_sig_swf ( self , file_contents ) :
if file_contents [ 1 : 3 ] != b ' WS ' :
raise ExtractorError (
u ' Not an SWF file; header is %r ' % file_contents [ : 3 ] )
if file_contents [ : 1 ] == b ' C ' :
content = zlib . decompress ( file_contents [ 8 : ] )
else :
raise NotImplementedError ( u ' Unsupported compression format %r ' %
file_contents [ : 1 ] )
def extract_tags ( content ) :
pos = 0
while pos < len ( content ) :
header16 = struct . unpack ( ' <H ' , content [ pos : pos + 2 ] ) [ 0 ]
pos + = 2
tag_code = header16 >> 6
tag_len = header16 & 0x3f
if tag_len == 0x3f :
tag_len = struct . unpack ( ' <I ' , content [ pos : pos + 4 ] ) [ 0 ]
pos + = 4
assert pos + tag_len < = len ( content )
yield ( tag_code , content [ pos : pos + tag_len ] )
pos + = tag_len
code_tag = next ( tag
for tag_code , tag in extract_tags ( content )
if tag_code == 82 )
p = code_tag . index ( b ' \0 ' , 4 ) + 1
code_reader = io . BytesIO ( code_tag [ p : ] )
# Parse ABC (AVM2 ByteCode)
def read_int ( reader = None ) :
if reader is None :
reader = code_reader
res = 0
shift = 0
for _ in range ( 5 ) :
buf = reader . read ( 1 )
assert len ( buf ) == 1
b = struct . unpack ( ' <B ' , buf ) [ 0 ]
res = res | ( ( b & 0x7f ) << shift )
if b & 0x80 == 0 :
break
shift + = 7
return res
def u30 ( reader = None ) :
res = read_int ( reader )
assert res & 0xf0000000 == 0
return res
u32 = read_int
def s32 ( reader = None ) :
v = read_int ( reader )
if v & 0x80000000 != 0 :
v = - ( ( v ^ 0xffffffff ) + 1 )
return v
def read_string ( reader = None ) :
if reader is None :
reader = code_reader
slen = u30 ( reader )
resb = reader . read ( slen )
assert len ( resb ) == slen
return resb . decode ( ' utf-8 ' )
def read_bytes ( count , reader = None ) :
if reader is None :
reader = code_reader
resb = reader . read ( count )
assert len ( resb ) == count
return resb
def read_byte ( reader = None ) :
resb = read_bytes ( 1 , reader = reader )
res = struct . unpack ( ' <B ' , resb ) [ 0 ]
return res
# minor_version + major_version
read_bytes ( 2 + 2 )
# Constant pool
int_count = u30 ( )
for _c in range ( 1 , int_count ) :
s32 ( )
uint_count = u30 ( )
for _c in range ( 1 , uint_count ) :
u32 ( )
double_count = u30 ( )
read_bytes ( ( double_count - 1 ) * 8 )
string_count = u30 ( )
constant_strings = [ u ' ' ]
for _c in range ( 1 , string_count ) :
s = read_string ( )
constant_strings . append ( s )
namespace_count = u30 ( )
for _c in range ( 1 , namespace_count ) :
read_bytes ( 1 ) # kind
u30 ( ) # name
ns_set_count = u30 ( )
for _c in range ( 1 , ns_set_count ) :
count = u30 ( )
for _c2 in range ( count ) :
u30 ( )
multiname_count = u30 ( )
MULTINAME_SIZES = {
0x07 : 2 , # QName
0x0d : 2 , # QNameA
0x0f : 1 , # RTQName
0x10 : 1 , # RTQNameA
0x11 : 0 , # RTQNameL
0x12 : 0 , # RTQNameLA
0x09 : 2 , # Multiname
0x0e : 2 , # MultinameA
0x1b : 1 , # MultinameL
0x1c : 1 , # MultinameLA
}
multinames = [ u ' ' ]
for _c in range ( 1 , multiname_count ) :
kind = u30 ( )
assert kind in MULTINAME_SIZES , u ' Invalid multiname kind %r ' % kind
if kind == 0x07 :
u30 ( ) # namespace_idx
name_idx = u30 ( )
multinames . append ( constant_strings [ name_idx ] )
else :
multinames . append ( ' [MULTINAME kind: %d ] ' % kind )
for _c2 in range ( MULTINAME_SIZES [ kind ] ) :
u30 ( )
# Methods
method_count = u30 ( )
MethodInfo = collections . namedtuple (
' MethodInfo ' ,
[ ' NEED_ARGUMENTS ' , ' NEED_REST ' ] )
method_infos = [ ]
for method_id in range ( method_count ) :
param_count = u30 ( )
u30 ( ) # return type
for _ in range ( param_count ) :
u30 ( ) # param type
u30 ( ) # name index (always 0 for youtube)
flags = read_byte ( )
if flags & 0x08 != 0 :
# Options present
option_count = u30 ( )
for c in range ( option_count ) :
u30 ( ) # val
read_bytes ( 1 ) # kind
if flags & 0x80 != 0 :
# Param names present
for _ in range ( param_count ) :
u30 ( ) # param name
mi = MethodInfo ( flags & 0x01 != 0 , flags & 0x04 != 0 )
method_infos . append ( mi )
# Metadata
metadata_count = u30 ( )
for _c in range ( metadata_count ) :
u30 ( ) # name
item_count = u30 ( )
for _c2 in range ( item_count ) :
u30 ( ) # key
u30 ( ) # value
def parse_traits_info ( ) :
trait_name_idx = u30 ( )
kind_full = read_byte ( )
kind = kind_full & 0x0f
attrs = kind_full >> 4
methods = { }
if kind in [ 0x00 , 0x06 ] : # Slot or Const
u30 ( ) # Slot id
u30 ( ) # type_name_idx
vindex = u30 ( )
if vindex != 0 :
read_byte ( ) # vkind
elif kind in [ 0x01 , 0x02 , 0x03 ] : # Method / Getter / Setter
u30 ( ) # disp_id
method_idx = u30 ( )
methods [ multinames [ trait_name_idx ] ] = method_idx
elif kind == 0x04 : # Class
u30 ( ) # slot_id
u30 ( ) # classi
elif kind == 0x05 : # Function
u30 ( ) # slot_id
function_idx = u30 ( )
methods [ function_idx ] = multinames [ trait_name_idx ]
else :
raise ExtractorError ( u ' Unsupported trait kind %d ' % kind )
if attrs & 0x4 != 0 : # Metadata present
metadata_count = u30 ( )
for _c3 in range ( metadata_count ) :
u30 ( ) # metadata index
return methods
# Classes
TARGET_CLASSNAME = u ' SignatureDecipher '
searched_idx = multinames . index ( TARGET_CLASSNAME )
searched_class_id = None
class_count = u30 ( )
for class_id in range ( class_count ) :
name_idx = u30 ( )
if name_idx == searched_idx :
# We found the class we're looking for!
searched_class_id = class_id
u30 ( ) # super_name idx
flags = read_byte ( )
if flags & 0x08 != 0 : # Protected namespace is present
u30 ( ) # protected_ns_idx
intrf_count = u30 ( )
for _c2 in range ( intrf_count ) :
u30 ( )
u30 ( ) # iinit
trait_count = u30 ( )
for _c2 in range ( trait_count ) :
parse_traits_info ( )
if searched_class_id is None :
raise ExtractorError ( u ' Target class %r not found ' %
TARGET_CLASSNAME )
method_names = { }
method_idxs = { }
for class_id in range ( class_count ) :
u30 ( ) # cinit
trait_count = u30 ( )
for _c2 in range ( trait_count ) :
trait_methods = parse_traits_info ( )
if class_id == searched_class_id :
method_names . update ( trait_methods . items ( ) )
method_idxs . update ( dict (
( idx , name )
for name , idx in trait_methods . items ( ) ) )
# Scripts
script_count = u30 ( )
for _c in range ( script_count ) :
u30 ( ) # init
trait_count = u30 ( )
for _c2 in range ( trait_count ) :
parse_traits_info ( )
# Method bodies
method_body_count = u30 ( )
Method = collections . namedtuple ( ' Method ' , [ ' code ' , ' local_count ' ] )
methods = { }
for _c in range ( method_body_count ) :
method_idx = u30 ( )
u30 ( ) # max_stack
local_count = u30 ( )
u30 ( ) # init_scope_depth
u30 ( ) # max_scope_depth
code_length = u30 ( )
code = read_bytes ( code_length )
if method_idx in method_idxs :
m = Method ( code , local_count )
methods [ method_idxs [ method_idx ] ] = m
exception_count = u30 ( )
for _c2 in range ( exception_count ) :
u30 ( ) # from
u30 ( ) # to
u30 ( ) # target
u30 ( ) # exc_type
u30 ( ) # var_name
trait_count = u30 ( )
for _c2 in range ( trait_count ) :
parse_traits_info ( )
assert p + code_reader . tell ( ) == len ( code_tag )
assert len ( methods ) == len ( method_idxs )
method_pyfunctions = { }
def extract_function ( func_name ) :
if func_name in method_pyfunctions :
return method_pyfunctions [ func_name ]
if func_name not in methods :
raise ExtractorError ( u ' Cannot find function %r ' % func_name )
m = methods [ func_name ]
def resfunc ( args ) :
registers = [ ' (this) ' ] + list ( args ) + [ None ] * m . local_count
stack = [ ]
coder = io . BytesIO ( m . code )
while True :
opcode = struct . unpack ( ' !B ' , coder . read ( 1 ) ) [ 0 ]
if opcode == 36 : # pushbyte
v = struct . unpack ( ' !B ' , coder . read ( 1 ) ) [ 0 ]
stack . append ( v )
elif opcode == 44 : # pushstring
idx = u30 ( coder )
stack . append ( constant_strings [ idx ] )
elif opcode == 48 : # pushscope
# We don't implement the scope register, so we'll just
# ignore the popped value
stack . pop ( )
elif opcode == 70 : # callproperty
index = u30 ( coder )
mname = multinames [ index ]
arg_count = u30 ( coder )
args = list ( reversed (
[ stack . pop ( ) for _ in range ( arg_count ) ] ) )
obj = stack . pop ( )
if mname == u ' split ' :
assert len ( args ) == 1
assert isinstance ( args [ 0 ] , compat_str )
assert isinstance ( obj , compat_str )
if args [ 0 ] == u ' ' :
res = list ( obj )
else :
res = obj . split ( args [ 0 ] )
stack . append ( res )
elif mname == u ' slice ' :
assert len ( args ) == 1
assert isinstance ( args [ 0 ] , int )
assert isinstance ( obj , list )
res = obj [ args [ 0 ] : ]
stack . append ( res )
elif mname == u ' join ' :
assert len ( args ) == 1
assert isinstance ( args [ 0 ] , compat_str )
assert isinstance ( obj , list )
res = args [ 0 ] . join ( obj )
stack . append ( res )
elif mname in method_pyfunctions :
stack . append ( method_pyfunctions [ mname ] ( args ) )
else :
raise NotImplementedError (
u ' Unsupported property %r on %r '
% ( mname , obj ) )
elif opcode == 72 : # returnvalue
res = stack . pop ( )
return res
elif opcode == 79 : # callpropvoid
index = u30 ( coder )
mname = multinames [ index ]
arg_count = u30 ( coder )
args = list ( reversed (
[ stack . pop ( ) for _ in range ( arg_count ) ] ) )
obj = stack . pop ( )
if mname == u ' reverse ' :
assert isinstance ( obj , list )
obj . reverse ( )
else :
raise NotImplementedError (
u ' Unsupported (void) property %r on %r '
% ( mname , obj ) )
elif opcode == 93 : # findpropstrict
index = u30 ( coder )
mname = multinames [ index ]
res = extract_function ( mname )
stack . append ( res )
elif opcode == 97 : # setproperty
index = u30 ( coder )
value = stack . pop ( )
idx = stack . pop ( )
obj = stack . pop ( )
assert isinstance ( obj , list )
assert isinstance ( idx , int )
obj [ idx ] = value
elif opcode == 98 : # getlocal
index = u30 ( coder )
stack . append ( registers [ index ] )
elif opcode == 99 : # setlocal
index = u30 ( coder )
value = stack . pop ( )
registers [ index ] = value
elif opcode == 102 : # getproperty
index = u30 ( coder )
pname = multinames [ index ]
if pname == u ' length ' :
obj = stack . pop ( )
assert isinstance ( obj , list )
stack . append ( len ( obj ) )
else : # Assume attribute access
idx = stack . pop ( )
assert isinstance ( idx , int )
obj = stack . pop ( )
assert isinstance ( obj , list )
stack . append ( obj [ idx ] )
elif opcode == 128 : # coerce
u30 ( coder )
elif opcode == 133 : # coerce_s
assert isinstance ( stack [ - 1 ] , ( type ( None ) , compat_str ) )
elif opcode == 164 : # modulo
value2 = stack . pop ( )
value1 = stack . pop ( )
res = value1 % value2
stack . append ( res )
elif opcode == 208 : # getlocal_0
stack . append ( registers [ 0 ] )
elif opcode == 209 : # getlocal_1
stack . append ( registers [ 1 ] )
elif opcode == 210 : # getlocal_2
stack . append ( registers [ 2 ] )
elif opcode == 211 : # getlocal_3
stack . append ( registers [ 3 ] )
elif opcode == 214 : # setlocal_2
registers [ 2 ] = stack . pop ( )
elif opcode == 215 : # setlocal_3
registers [ 3 ] = stack . pop ( )
else :
raise NotImplementedError (
u ' Unsupported opcode %d ' % opcode )
method_pyfunctions [ func_name ] = resfunc
return resfunc
initial_function = extract_function ( u ' decipher ' )
return lambda s : initial_function ( [ s ] )
def _decrypt_signature ( self , s , video_id , player_url , age_gate = False ) :
""" Turn the encrypted s field into a working signature """
if len ( s ) == 92 :
if player_url is not None :
try :
if player_url not in self . _player_cache :
func = self . _extract_signature_function (
video_id , player_url , len ( s )
)
self . _player_cache [ player_url ] = func
func = self . _player_cache [ player_url ]
if self . _downloader . params . get ( ' youtube_print_sig_code ' ) :
self . _print_sig_code ( func , len ( s ) )
return func ( s )
except Exception :
tb = traceback . format_exc ( )
self . _downloader . report_warning (
u ' Automatic signature extraction failed: ' + tb )
self . _downloader . report_warning (
u ' Warning: Falling back to static signature algorithm ' )
return self . _static_decrypt_signature (
s , video_id , player_url , age_gate )
def _static_decrypt_signature ( self , s , video_id , player_url , age_gate ) :
if age_gate :
# The videos with age protection use another player, so the
# algorithms can be different.
if len ( s ) == 86 :
return s [ 2 : 63 ] + s [ 82 ] + s [ 64 : 82 ] + s [ 63 ]
if len ( s ) == 93 :
return s [ 86 : 29 : - 1 ] + s [ 88 ] + s [ 28 : 5 : - 1 ]
elif len ( s ) == 92 :
return s [ 25 ] + s [ 3 : 25 ] + s [ 0 ] + s [ 26 : 42 ] + s [ 79 ] + s [ 43 : 79 ] + s [ 91 ] + s [ 80 : 83 ]
elif len ( s ) == 91 :
return s [ 84 : 27 : - 1 ] + s [ 86 ] + s [ 26 : 5 : - 1 ]
elif len ( s ) == 90 :
return s [ 25 ] + s [ 3 : 25 ] + s [ 2 ] + s [ 26 : 40 ] + s [ 77 ] + s [ 41 : 77 ] + s [ 89 ] + s [ 78 : 81 ]
elif len ( s ) == 89 :
@ -426,13 +1078,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
elif len ( s ) == 87 :
return s [ 6 : 27 ] + s [ 4 ] + s [ 28 : 39 ] + s [ 27 ] + s [ 40 : 59 ] + s [ 2 ] + s [ 60 : ]
elif len ( s ) == 86 :
return s [ 5 : 34 ] + s [ 0 ] + s [ 35 : 38 ] + s [ 3 ] + s [ 39 : 45 ] + s [ 38 ] + s [ 46 : 53 ] + s [ 73 ] + s [ 54 : 73 ] + s [ 85 ] + s [ 74 : 85 ] + s [ 53 ]
return s [ 80 : 72 : - 1 ] + s [ 16 ] + s [ 71 : 39 : - 1 ] + s [ 72 ] + s [ 38 : 16 : - 1 ] + s [ 82 ] + s [ 15 : : - 1 ]
elif len ( s ) == 85 :
return s [ 40 ] + s [ 82 : 4 3: - 1 ] + s [ 22 ] + s [ 42 : 40 : - 1 ] + s [ 83 ] + s [ 39 : 22 : - 1 ] + s [ 0 ] + s [ 21 : 2 : - 1 ]
return s [ 3 : 11 ] + s [ 0 ] + s [ 12 : 55 ] + s [ 84 ] + s [ 56 : 84 ]
elif len ( s ) == 84 :
return s [ 81 : 36 : - 1 ] + s [ 0 ] + s [ 35 : 2 : - 1 ]
return s [ 78 : 70 : - 1 ] + s [ 14 ] + s [ 69 : 37 : - 1 ] + s [ 7 0] + s [ 36 : 14 : - 1 ] + s [ 80 ] + s [ : 14 ] [ : : - 1 ]
elif len ( s ) == 83 :
return s [ 81 : 64 : - 1 ] + s [ 82 ] + s [ 63 : 52 : - 1 ] + s [ 45 ] + s [ 51 : 45 : - 1 ] + s [ 1 ] + s [ 44 : 1 : - 1 ] + s [ 0 ]
return s [ 80 : 63 : - 1 ] + s [ 0 ] + s [ 62 : 0 : - 1 ] + s [ 63 ]
elif len ( s ) == 82 :
return s [ 80 : 73 : - 1 ] + s [ 81 ] + s [ 72 : 54 : - 1 ] + s [ 2 ] + s [ 53 : 43 : - 1 ] + s [ 0 ] + s [ 42 : 2 : - 1 ] + s [ 43 ] + s [ 1 ] + s [ 54 ]
elif len ( s ) == 81 :
@ -445,15 +1097,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
else :
raise ExtractorError ( u ' Unable to decrypt signature, key length %d not supported; retrying might work ' % ( len ( s ) ) )
def _decrypt_signature_age_gate ( self , s ) :
# The videos with age protection use another player, so the algorithms
# can be different.
if len ( s ) == 86 :
return s [ 2 : 63 ] + s [ 82 ] + s [ 64 : 82 ] + s [ 63 ]
else :
# Fallback to the other algortihms
return self . _decrypt_signature ( s )
def _get_available_subtitles ( self , video_id ) :
try :
sub_list = self . _download_webpage (
@ -626,7 +1269,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
video_webpage = video_webpage_bytes . decode ( ' utf-8 ' , ' ignore ' )
# Attempt to extract SWF player URL
mobj = re . search ( r ' swfConfig.*? " (http: \\ / \\ /.*?watch.*?-.*? \ .swf) " ' , video_webpage )
mobj = re . search ( r ' swfConfig.*? " (https? : \\ / \\ /.*?watch.*?-.*? \ .swf) " ' , video_webpage )
if mobj is not None :
player_url = re . sub ( r ' \\ (.) ' , r ' \ 1 ' , mobj . group ( 1 ) )
else :
@ -702,7 +1345,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
video_thumbnail = m_thumb . group ( 1 )
elif ' thumbnail_url ' not in video_info :
self . _downloader . report_warning ( u ' unable to extract video thumbnail ' )
video_thumbnail = ' '
video_thumbnail = None
else : # don't panic if we can't find it
video_thumbnail = compat_urllib_parse . unquote_plus ( video_info [ ' thumbnail_url ' ] [ 0 ] )
@ -779,24 +1422,34 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
if ' sig ' in url_data :
url + = ' &signature= ' + url_data [ ' sig ' ] [ 0 ]
elif ' s ' in url_data :
encrypted_sig = url_data [ ' s ' ] [ 0 ]
if self . _downloader . params . get ( ' verbose ' ) :
s = url_data [ ' s ' ] [ 0 ]
if age_gate :
player_version = self . _search_regex ( r ' ad3-(.+?) \ .swf ' ,
video_info [ ' ad3_module ' ] [ 0 ] if ' ad3_module ' in video_info else ' NOT FOUND ' ,
' flash player ' , fatal = False )
player = ' flash player %s ' % player_version
if player_url is None :
player_version = ' unknown '
else :
player_version = self . _search_regex (
r ' -(.+) \ .swf$ ' , player_url ,
u ' flash player ' , fatal = False )
player_desc = ' flash player %s ' % player_version
else :
player = u ' html5 player %s ' % self . _search_regex ( r ' html5player-(.+?) \ .js ' , video_webpage ,
player_version = self . _search_regex (
r ' html5player-(.+?) \ .js ' , video_webpage ,
' html5 player ' , fatal = False )
parts_sizes = u ' . ' . join ( compat_str ( len ( part ) ) for part in s . split ( ' . ' ) )
player_desc = u ' html5 player %s ' % player_version
parts_sizes = u ' . ' . join ( compat_str ( len ( part ) ) for part in encrypted_sig . split ( ' . ' ) )
self . to_screen ( u ' encrypted signature length %d ( %s ), itag %s , %s ' %
( len ( s ) , parts_sizes , url_data [ ' itag ' ] [ 0 ] , player ) )
encrypted_sig = url_data [ ' s ' ] [ 0 ]
if age_gate :
signature = self . _decrypt_signature_age_gate ( encrypted_sig )
else :
signature = self . _decrypt_signature ( encrypted_sig )
( len ( encrypted_sig ) , parts_sizes , url_data [ ' itag ' ] [ 0 ] , player_desc ) )
if not age_gate :
jsplayer_url_json = self . _search_regex (
r ' " assets " :.+? " js " : \ s*( " [^ " ]+ " ) ' ,
video_webpage , u ' JS player URL ' )
player_url = json . loads ( jsplayer_url_json )
signature = self . _decrypt_signature (
encrypted_sig , video_id , player_url , age_gate )
url + = ' &signature= ' + signature
if ' ratebypass ' not in url :
url + = ' &ratebypass=yes '
@ -812,7 +1465,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
return
else :
raise ExtractorError ( u ' no conn or url_encoded_fmt_stream_map information found in video info ' )
raise ExtractorError ( u ' no conn, hlsvp or url_encoded_fmt_stream_map information found in video info ' )
results = [ ]
for format_param , video_real_url in video_url_list :
@ -1007,6 +1660,9 @@ class YoutubeUserIE(InfoExtractor):
response = json . loads ( page )
except ValueError as err :
raise ExtractorError ( u ' Invalid JSON in API response: ' + compat_str ( err ) )
if ' entry ' not in response [ ' feed ' ] :
# Number of videos is a multiple of self._MAX_RESULTS
break
# Extract video identifiers
ids_in_page = [ ]