You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

542 lines
18 KiB

10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
  1. from __future__ import unicode_literals
  2. import binascii
  3. import collections
  4. import email
  5. import getpass
  6. import io
  7. import optparse
  8. import os
  9. import re
  10. import shlex
  11. import shutil
  12. import socket
  13. import subprocess
  14. import sys
  15. import itertools
  16. try:
  17. import urllib.request as compat_urllib_request
  18. except ImportError: # Python 2
  19. import urllib2 as compat_urllib_request
  20. try:
  21. import urllib.error as compat_urllib_error
  22. except ImportError: # Python 2
  23. import urllib2 as compat_urllib_error
  24. try:
  25. import urllib.parse as compat_urllib_parse
  26. except ImportError: # Python 2
  27. import urllib as compat_urllib_parse
  28. try:
  29. from urllib.parse import urlparse as compat_urllib_parse_urlparse
  30. except ImportError: # Python 2
  31. from urlparse import urlparse as compat_urllib_parse_urlparse
  32. try:
  33. import urllib.parse as compat_urlparse
  34. except ImportError: # Python 2
  35. import urlparse as compat_urlparse
  36. try:
  37. import urllib.response as compat_urllib_response
  38. except ImportError: # Python 2
  39. import urllib as compat_urllib_response
  40. try:
  41. import http.cookiejar as compat_cookiejar
  42. except ImportError: # Python 2
  43. import cookielib as compat_cookiejar
  44. try:
  45. import http.cookies as compat_cookies
  46. except ImportError: # Python 2
  47. import Cookie as compat_cookies
  48. try:
  49. import html.entities as compat_html_entities
  50. except ImportError: # Python 2
  51. import htmlentitydefs as compat_html_entities
  52. try:
  53. import http.client as compat_http_client
  54. except ImportError: # Python 2
  55. import httplib as compat_http_client
  56. try:
  57. from urllib.error import HTTPError as compat_HTTPError
  58. except ImportError: # Python 2
  59. from urllib2 import HTTPError as compat_HTTPError
  60. try:
  61. from urllib.request import urlretrieve as compat_urlretrieve
  62. except ImportError: # Python 2
  63. from urllib import urlretrieve as compat_urlretrieve
  64. try:
  65. from subprocess import DEVNULL
  66. compat_subprocess_get_DEVNULL = lambda: DEVNULL
  67. except ImportError:
  68. compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
  69. try:
  70. import http.server as compat_http_server
  71. except ImportError:
  72. import BaseHTTPServer as compat_http_server
  73. try:
  74. compat_str = unicode # Python 2
  75. except NameError:
  76. compat_str = str
  77. try:
  78. from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes
  79. from urllib.parse import unquote as compat_urllib_parse_unquote
  80. from urllib.parse import unquote_plus as compat_urllib_parse_unquote_plus
  81. except ImportError: # Python 2
  82. _asciire = (compat_urllib_parse._asciire if hasattr(compat_urllib_parse, '_asciire')
  83. else re.compile('([\x00-\x7f]+)'))
  84. # HACK: The following are the correct unquote_to_bytes, unquote and unquote_plus
  85. # implementations from cpython 3.4.3's stdlib. Python 2's version
  86. # is apparently broken (see https://github.com/rg3/youtube-dl/pull/6244)
  87. def compat_urllib_parse_unquote_to_bytes(string):
  88. """unquote_to_bytes('abc%20def') -> b'abc def'."""
  89. # Note: strings are encoded as UTF-8. This is only an issue if it contains
  90. # unescaped non-ASCII characters, which URIs should not.
  91. if not string:
  92. # Is it a string-like object?
  93. string.split
  94. return b''
  95. if isinstance(string, compat_str):
  96. string = string.encode('utf-8')
  97. bits = string.split(b'%')
  98. if len(bits) == 1:
  99. return string
  100. res = [bits[0]]
  101. append = res.append
  102. for item in bits[1:]:
  103. try:
  104. append(compat_urllib_parse._hextochr[item[:2]])
  105. append(item[2:])
  106. except KeyError:
  107. append(b'%')
  108. append(item)
  109. return b''.join(res)
  110. def compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'):
  111. """Replace %xx escapes by their single-character equivalent. The optional
  112. encoding and errors parameters specify how to decode percent-encoded
  113. sequences into Unicode characters, as accepted by the bytes.decode()
  114. method.
  115. By default, percent-encoded sequences are decoded with UTF-8, and invalid
  116. sequences are replaced by a placeholder character.
  117. unquote('abc%20def') -> 'abc def'.
  118. """
  119. if '%' not in string:
  120. string.split
  121. return string
  122. if encoding is None:
  123. encoding = 'utf-8'
  124. if errors is None:
  125. errors = 'replace'
  126. bits = _asciire.split(string)
  127. res = [bits[0]]
  128. append = res.append
  129. for i in range(1, len(bits), 2):
  130. append(compat_urllib_parse_unquote_to_bytes(bits[i]).decode(encoding, errors))
  131. append(bits[i + 1])
  132. return ''.join(res)
  133. def compat_urllib_parse_unquote_plus(string, encoding='utf-8', errors='replace'):
  134. """Like unquote(), but also replace plus signs by spaces, as required for
  135. unquoting HTML form values.
  136. unquote_plus('%7e/abc+def') -> '~/abc def'
  137. """
  138. string = string.replace('+', ' ')
  139. return compat_urllib_parse_unquote(string, encoding, errors)
  140. try:
  141. from urllib.request import DataHandler as compat_urllib_request_DataHandler
  142. except ImportError: # Python < 3.4
  143. # Ported from CPython 98774:1733b3bd46db, Lib/urllib/request.py
  144. class compat_urllib_request_DataHandler(compat_urllib_request.BaseHandler):
  145. def data_open(self, req):
  146. # data URLs as specified in RFC 2397.
  147. #
  148. # ignores POSTed data
  149. #
  150. # syntax:
  151. # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
  152. # mediatype := [ type "/" subtype ] *( ";" parameter )
  153. # data := *urlchar
  154. # parameter := attribute "=" value
  155. url = req.get_full_url()
  156. scheme, data = url.split(":", 1)
  157. mediatype, data = data.split(",", 1)
  158. # even base64 encoded data URLs might be quoted so unquote in any case:
  159. data = compat_urllib_parse_unquote_to_bytes(data)
  160. if mediatype.endswith(";base64"):
  161. data = binascii.a2b_base64(data)
  162. mediatype = mediatype[:-7]
  163. if not mediatype:
  164. mediatype = "text/plain;charset=US-ASCII"
  165. headers = email.message_from_string(
  166. "Content-type: %s\nContent-length: %d\n" % (mediatype, len(data)))
  167. return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url)
  168. try:
  169. compat_basestring = basestring # Python 2
  170. except NameError:
  171. compat_basestring = str
  172. try:
  173. compat_chr = unichr # Python 2
  174. except NameError:
  175. compat_chr = chr
  176. try:
  177. from xml.etree.ElementTree import ParseError as compat_xml_parse_error
  178. except ImportError: # Python 2.6
  179. from xml.parsers.expat import ExpatError as compat_xml_parse_error
  180. try:
  181. from urllib.parse import parse_qs as compat_parse_qs
  182. except ImportError: # Python 2
  183. # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
  184. # Python 2's version is apparently totally broken
  185. def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
  186. encoding='utf-8', errors='replace'):
  187. qs, _coerce_result = qs, compat_str
  188. pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
  189. r = []
  190. for name_value in pairs:
  191. if not name_value and not strict_parsing:
  192. continue
  193. nv = name_value.split('=', 1)
  194. if len(nv) != 2:
  195. if strict_parsing:
  196. raise ValueError("bad query field: %r" % (name_value,))
  197. # Handle case of a control-name with no equal sign
  198. if keep_blank_values:
  199. nv.append('')
  200. else:
  201. continue
  202. if len(nv[1]) or keep_blank_values:
  203. name = nv[0].replace('+', ' ')
  204. name = compat_urllib_parse_unquote(
  205. name, encoding=encoding, errors=errors)
  206. name = _coerce_result(name)
  207. value = nv[1].replace('+', ' ')
  208. value = compat_urllib_parse_unquote(
  209. value, encoding=encoding, errors=errors)
  210. value = _coerce_result(value)
  211. r.append((name, value))
  212. return r
  213. def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
  214. encoding='utf-8', errors='replace'):
  215. parsed_result = {}
  216. pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
  217. encoding=encoding, errors=errors)
  218. for name, value in pairs:
  219. if name in parsed_result:
  220. parsed_result[name].append(value)
  221. else:
  222. parsed_result[name] = [value]
  223. return parsed_result
  224. try:
  225. from shlex import quote as shlex_quote
  226. except ImportError: # Python < 3.3
  227. def shlex_quote(s):
  228. if re.match(r'^[-_\w./]+$', s):
  229. return s
  230. else:
  231. return "'" + s.replace("'", "'\"'\"'") + "'"
  232. if sys.version_info >= (2, 7, 3):
  233. compat_shlex_split = shlex.split
  234. else:
  235. # Working around shlex issue with unicode strings on some python 2
  236. # versions (see http://bugs.python.org/issue1548891)
  237. def compat_shlex_split(s, comments=False, posix=True):
  238. if isinstance(s, compat_str):
  239. s = s.encode('utf-8')
  240. return shlex.split(s, comments, posix)
  241. def compat_ord(c):
  242. if type(c) is int:
  243. return c
  244. else:
  245. return ord(c)
  246. if sys.version_info >= (3, 0):
  247. compat_getenv = os.getenv
  248. compat_expanduser = os.path.expanduser
  249. else:
  250. # Environment variables should be decoded with filesystem encoding.
  251. # Otherwise it will fail if any non-ASCII characters present (see #3854 #3217 #2918)
  252. def compat_getenv(key, default=None):
  253. from .utils import get_filesystem_encoding
  254. env = os.getenv(key, default)
  255. if env:
  256. env = env.decode(get_filesystem_encoding())
  257. return env
  258. # HACK: The default implementations of os.path.expanduser from cpython do not decode
  259. # environment variables with filesystem encoding. We will work around this by
  260. # providing adjusted implementations.
  261. # The following are os.path.expanduser implementations from cpython 2.7.8 stdlib
  262. # for different platforms with correct environment variables decoding.
  263. if os.name == 'posix':
  264. def compat_expanduser(path):
  265. """Expand ~ and ~user constructions. If user or $HOME is unknown,
  266. do nothing."""
  267. if not path.startswith('~'):
  268. return path
  269. i = path.find('/', 1)
  270. if i < 0:
  271. i = len(path)
  272. if i == 1:
  273. if 'HOME' not in os.environ:
  274. import pwd
  275. userhome = pwd.getpwuid(os.getuid()).pw_dir
  276. else:
  277. userhome = compat_getenv('HOME')
  278. else:
  279. import pwd
  280. try:
  281. pwent = pwd.getpwnam(path[1:i])
  282. except KeyError:
  283. return path
  284. userhome = pwent.pw_dir
  285. userhome = userhome.rstrip('/')
  286. return (userhome + path[i:]) or '/'
  287. elif os.name == 'nt' or os.name == 'ce':
  288. def compat_expanduser(path):
  289. """Expand ~ and ~user constructs.
  290. If user or $HOME is unknown, do nothing."""
  291. if path[:1] != '~':
  292. return path
  293. i, n = 1, len(path)
  294. while i < n and path[i] not in '/\\':
  295. i = i + 1
  296. if 'HOME' in os.environ:
  297. userhome = compat_getenv('HOME')
  298. elif 'USERPROFILE' in os.environ:
  299. userhome = compat_getenv('USERPROFILE')
  300. elif 'HOMEPATH' not in os.environ:
  301. return path
  302. else:
  303. try:
  304. drive = compat_getenv('HOMEDRIVE')
  305. except KeyError:
  306. drive = ''
  307. userhome = os.path.join(drive, compat_getenv('HOMEPATH'))
  308. if i != 1: # ~user
  309. userhome = os.path.join(os.path.dirname(userhome), path[1:i])
  310. return userhome + path[i:]
  311. else:
  312. compat_expanduser = os.path.expanduser
  313. if sys.version_info < (3, 0):
  314. def compat_print(s):
  315. from .utils import preferredencoding
  316. print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
  317. else:
  318. def compat_print(s):
  319. assert isinstance(s, compat_str)
  320. print(s)
  321. try:
  322. subprocess_check_output = subprocess.check_output
  323. except AttributeError:
  324. def subprocess_check_output(*args, **kwargs):
  325. assert 'input' not in kwargs
  326. p = subprocess.Popen(*args, stdout=subprocess.PIPE, **kwargs)
  327. output, _ = p.communicate()
  328. ret = p.poll()
  329. if ret:
  330. raise subprocess.CalledProcessError(ret, p.args, output=output)
  331. return output
  332. if sys.version_info < (3, 0) and sys.platform == 'win32':
  333. def compat_getpass(prompt, *args, **kwargs):
  334. if isinstance(prompt, compat_str):
  335. from .utils import preferredencoding
  336. prompt = prompt.encode(preferredencoding())
  337. return getpass.getpass(prompt, *args, **kwargs)
  338. else:
  339. compat_getpass = getpass.getpass
  340. # Old 2.6 and 2.7 releases require kwargs to be bytes
  341. try:
  342. def _testfunc(x):
  343. pass
  344. _testfunc(**{'x': 0})
  345. except TypeError:
  346. def compat_kwargs(kwargs):
  347. return dict((bytes(k), v) for k, v in kwargs.items())
  348. else:
  349. compat_kwargs = lambda kwargs: kwargs
  350. if sys.version_info < (2, 7):
  351. def compat_socket_create_connection(address, timeout, source_address=None):
  352. host, port = address
  353. err = None
  354. for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM):
  355. af, socktype, proto, canonname, sa = res
  356. sock = None
  357. try:
  358. sock = socket.socket(af, socktype, proto)
  359. sock.settimeout(timeout)
  360. if source_address:
  361. sock.bind(source_address)
  362. sock.connect(sa)
  363. return sock
  364. except socket.error as _:
  365. err = _
  366. if sock is not None:
  367. sock.close()
  368. if err is not None:
  369. raise err
  370. else:
  371. raise socket.error("getaddrinfo returns an empty list")
  372. else:
  373. compat_socket_create_connection = socket.create_connection
  374. # Fix https://github.com/rg3/youtube-dl/issues/4223
  375. # See http://bugs.python.org/issue9161 for what is broken
  376. def workaround_optparse_bug9161():
  377. op = optparse.OptionParser()
  378. og = optparse.OptionGroup(op, 'foo')
  379. try:
  380. og.add_option('-t')
  381. except TypeError:
  382. real_add_option = optparse.OptionGroup.add_option
  383. def _compat_add_option(self, *args, **kwargs):
  384. enc = lambda v: (
  385. v.encode('ascii', 'replace') if isinstance(v, compat_str)
  386. else v)
  387. bargs = [enc(a) for a in args]
  388. bkwargs = dict(
  389. (k, enc(v)) for k, v in kwargs.items())
  390. return real_add_option(self, *bargs, **bkwargs)
  391. optparse.OptionGroup.add_option = _compat_add_option
  392. if hasattr(shutil, 'get_terminal_size'): # Python >= 3.3
  393. compat_get_terminal_size = shutil.get_terminal_size
  394. else:
  395. _terminal_size = collections.namedtuple('terminal_size', ['columns', 'lines'])
  396. def compat_get_terminal_size(fallback=(80, 24)):
  397. columns = compat_getenv('COLUMNS')
  398. if columns:
  399. columns = int(columns)
  400. else:
  401. columns = None
  402. lines = compat_getenv('LINES')
  403. if lines:
  404. lines = int(lines)
  405. else:
  406. lines = None
  407. if columns is None or lines is None or columns <= 0 or lines <= 0:
  408. try:
  409. sp = subprocess.Popen(
  410. ['stty', 'size'],
  411. stdout=subprocess.PIPE, stderr=subprocess.PIPE)
  412. out, err = sp.communicate()
  413. _lines, _columns = map(int, out.split())
  414. except Exception:
  415. _columns, _lines = _terminal_size(*fallback)
  416. if columns is None or columns <= 0:
  417. columns = _columns
  418. if lines is None or lines <= 0:
  419. lines = _lines
  420. return _terminal_size(columns, lines)
  421. try:
  422. itertools.count(start=0, step=1)
  423. compat_itertools_count = itertools.count
  424. except TypeError: # Python 2.6
  425. def compat_itertools_count(start=0, step=1):
  426. n = start
  427. while True:
  428. yield n
  429. n += step
  430. if sys.version_info >= (3, 0):
  431. from tokenize import tokenize as compat_tokenize_tokenize
  432. else:
  433. from tokenize import generate_tokens as compat_tokenize_tokenize
  434. __all__ = [
  435. 'compat_HTTPError',
  436. 'compat_basestring',
  437. 'compat_chr',
  438. 'compat_cookiejar',
  439. 'compat_cookies',
  440. 'compat_expanduser',
  441. 'compat_get_terminal_size',
  442. 'compat_getenv',
  443. 'compat_getpass',
  444. 'compat_html_entities',
  445. 'compat_http_client',
  446. 'compat_http_server',
  447. 'compat_itertools_count',
  448. 'compat_kwargs',
  449. 'compat_ord',
  450. 'compat_parse_qs',
  451. 'compat_print',
  452. 'compat_shlex_split',
  453. 'compat_socket_create_connection',
  454. 'compat_str',
  455. 'compat_subprocess_get_DEVNULL',
  456. 'compat_tokenize_tokenize',
  457. 'compat_urllib_error',
  458. 'compat_urllib_parse',
  459. 'compat_urllib_parse_unquote',
  460. 'compat_urllib_parse_unquote_plus',
  461. 'compat_urllib_parse_unquote_to_bytes',
  462. 'compat_urllib_parse_urlparse',
  463. 'compat_urllib_request',
  464. 'compat_urllib_request_DataHandler',
  465. 'compat_urllib_response',
  466. 'compat_urlparse',
  467. 'compat_urlretrieve',
  468. 'compat_xml_parse_error',
  469. 'shlex_quote',
  470. 'subprocess_check_output',
  471. 'workaround_optparse_bug9161',
  472. ]