Browse Source

[youtube & jsinterp] Fix signature extraction (fixes #3255)

Some functions are defined now inside an object, the jsinterp will search its definition if the variable is not defined in the local namespace.
totalwebcasting
Jaime Marquínez Ferrándiz 10 years ago
parent
commit
ad25aee245
2 changed files with 43 additions and 3 deletions
  1. +6
    -0
      test/test_youtube_signature.py
  2. +37
    -3
      youtube_dl/jsinterp.py

+ 6
- 0
test/test_youtube_signature.py View File

@ -33,6 +33,12 @@ _TESTS = [
90,
u']\\[@?>=<;:/.-,+*)(\'&%$#"hZYXWVUTSRQPONMLKJIHGFEDCBAzyxwvutsrqponmlkjiagfedcb39876',
),
(
u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl0Cbn9e.js',
u'js',
84,
u'O1I3456789abcde0ghijklmnopqrstuvwxyzABCDEFGHfJKLMN2PQRSTUVW@YZ!"#$%&\'()*+,-./:;<=',
),
(
u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js',
u'js',


+ 37
- 3
youtube_dl/jsinterp.py View File

@ -11,6 +11,7 @@ class JSInterpreter(object):
def __init__(self, code):
self.code = code
self._functions = {}
self._objects = {}
def interpret_statement(self, stmt, local_vars, allow_recursion=20):
if allow_recursion < 0:
@ -55,7 +56,19 @@ class JSInterpreter(object):
m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr)
if m:
member = m.group('member')
val = local_vars[m.group('in')]
variable = m.group('in')
if variable not in local_vars:
if variable not in self._objects:
self._objects[variable] = self.extract_object(variable)
obj = self._objects[variable]
key, args = member.split('(', 1)
args = args.strip(')')
argvals = [int(v) if v.isdigit() else local_vars[v]
for v in args.split(',')]
return obj[key](argvals)
val = local_vars[variable]
if member == 'split("")':
return list(val)
if member == 'join("")':
@ -97,6 +110,25 @@ class JSInterpreter(object):
return self._functions[fname](argvals)
raise ExtractorError('Unsupported JS expression %r' % expr)
def extract_object(self, objname):
obj = {}
obj_m = re.search(
(r'(?:var\s+)?%s\s*=\s*\{' % re.escape(objname)) +
r'\s*(?P<fields>([a-zA-Z$]+\s*:\s*function\(.*?\)\s*\{.*?\})*)' +
r'\}\s*;',
self.code)
fields = obj_m.group('fields')
# Currently, it only supports function definitions
fields_m = re.finditer(
r'(?P<key>[a-zA-Z$]+)\s*:\s*function'
r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
fields)
for f in fields_m:
argnames = f.group('args').split(',')
obj[f.group('key')] = self.build_function(argnames, f.group('code'))
return obj
def extract_function(self, funcname):
func_m = re.search(
(r'(?:function %s|[{;]%s\s*=\s*function)' % (
@ -107,10 +139,12 @@ class JSInterpreter(object):
raise ExtractorError('Could not find JS function %r' % funcname)
argnames = func_m.group('args').split(',')
return self.build_function(argnames, func_m.group('code'))
def build_function(self, argnames, code):
def resf(args):
local_vars = dict(zip(argnames, args))
for stmt in func_m.group('code').split(';'):
for stmt in code.split(';'):
res = self.interpret_statement(stmt, local_vars)
return res
return resf

Loading…
Cancel
Save