You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

116 lines
4.1 KiB

  1. from __future__ import unicode_literals
  2. import re
  3. from .utils import (
  4. ExtractorError,
  5. )
  6. class JSInterpreter(object):
  7. def __init__(self, code):
  8. self.code = code
  9. self._functions = {}
  10. def interpret_statement(self, stmt, local_vars, allow_recursion=20):
  11. if allow_recursion < 0:
  12. raise ExtractorError('Recursion limit reached')
  13. if stmt.startswith('var '):
  14. stmt = stmt[len('var '):]
  15. ass_m = re.match(r'^(?P<out>[a-z]+)(?:\[(?P<index>[^\]]+)\])?' +
  16. r'=(?P<expr>.*)$', stmt)
  17. if ass_m:
  18. if ass_m.groupdict().get('index'):
  19. def assign(val):
  20. lvar = local_vars[ass_m.group('out')]
  21. idx = self.interpret_expression(
  22. ass_m.group('index'), local_vars, allow_recursion)
  23. assert isinstance(idx, int)
  24. lvar[idx] = val
  25. return val
  26. expr = ass_m.group('expr')
  27. else:
  28. def assign(val):
  29. local_vars[ass_m.group('out')] = val
  30. return val
  31. expr = ass_m.group('expr')
  32. elif stmt.startswith('return '):
  33. assign = lambda v: v
  34. expr = stmt[len('return '):]
  35. else:
  36. raise ExtractorError(
  37. 'Cannot determine left side of statement in %r' % stmt)
  38. v = self.interpret_expression(expr, local_vars, allow_recursion)
  39. return assign(v)
  40. def interpret_expression(self, expr, local_vars, allow_recursion):
  41. if expr.isdigit():
  42. return int(expr)
  43. if expr.isalpha():
  44. return local_vars[expr]
  45. m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr)
  46. if m:
  47. member = m.group('member')
  48. val = local_vars[m.group('in')]
  49. if member == 'split("")':
  50. return list(val)
  51. if member == 'join("")':
  52. return u''.join(val)
  53. if member == 'length':
  54. return len(val)
  55. if member == 'reverse()':
  56. return val[::-1]
  57. slice_m = re.match(r'slice\((?P<idx>.*)\)', member)
  58. if slice_m:
  59. idx = self.interpret_expression(
  60. slice_m.group('idx'), local_vars, allow_recursion - 1)
  61. return val[idx:]
  62. m = re.match(
  63. r'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr)
  64. if m:
  65. val = local_vars[m.group('in')]
  66. idx = self.interpret_expression(
  67. m.group('idx'), local_vars, allow_recursion - 1)
  68. return val[idx]
  69. m = re.match(r'^(?P<a>.+?)(?P<op>[%])(?P<b>.+?)$', expr)
  70. if m:
  71. a = self.interpret_expression(
  72. m.group('a'), local_vars, allow_recursion)
  73. b = self.interpret_expression(
  74. m.group('b'), local_vars, allow_recursion)
  75. return a % b
  76. m = re.match(
  77. r'^(?P<func>[a-zA-Z$]+)\((?P<args>[a-z0-9,]+)\)$', expr)
  78. if m:
  79. fname = m.group('func')
  80. if fname not in self._functions:
  81. self._functions[fname] = self.extract_function(fname)
  82. argvals = [int(v) if v.isdigit() else local_vars[v]
  83. for v in m.group('args').split(',')]
  84. return self._functions[fname](argvals)
  85. raise ExtractorError('Unsupported JS expression %r' % expr)
  86. def extract_function(self, funcname):
  87. func_m = re.search(
  88. (r'(?:function %s|%s\s*=\s*function)' % (
  89. re.escape(funcname), re.escape(funcname))) +
  90. r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
  91. self.code)
  92. if func_m is None:
  93. raise ExtractorError('Could not find JS function %r' % funcname)
  94. argnames = func_m.group('args').split(',')
  95. def resf(args):
  96. local_vars = dict(zip(argnames, args))
  97. for stmt in func_m.group('code').split(';'):
  98. res = self.interpret_statement(stmt, local_vars)
  99. return res
  100. return resf