You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

498 lines
16 KiB

10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
9 years ago
10 years ago
10 years ago
10 years ago
  1. from __future__ import unicode_literals
  2. import collections
  3. import getpass
  4. import optparse
  5. import os
  6. import re
  7. import shlex
  8. import shutil
  9. import socket
  10. import subprocess
  11. import sys
  12. import itertools
  13. try:
  14. import urllib.request as compat_urllib_request
  15. except ImportError: # Python 2
  16. import urllib2 as compat_urllib_request
  17. try:
  18. import urllib.error as compat_urllib_error
  19. except ImportError: # Python 2
  20. import urllib2 as compat_urllib_error
  21. try:
  22. import urllib.parse as compat_urllib_parse
  23. except ImportError: # Python 2
  24. import urllib as compat_urllib_parse
  25. try:
  26. from urllib.parse import urlparse as compat_urllib_parse_urlparse
  27. except ImportError: # Python 2
  28. from urlparse import urlparse as compat_urllib_parse_urlparse
  29. try:
  30. import urllib.parse as compat_urlparse
  31. except ImportError: # Python 2
  32. import urlparse as compat_urlparse
  33. try:
  34. import http.cookiejar as compat_cookiejar
  35. except ImportError: # Python 2
  36. import cookielib as compat_cookiejar
  37. try:
  38. import http.cookies as compat_cookies
  39. except ImportError: # Python 2
  40. import Cookie as compat_cookies
  41. try:
  42. import html.entities as compat_html_entities
  43. except ImportError: # Python 2
  44. import htmlentitydefs as compat_html_entities
  45. try:
  46. import http.client as compat_http_client
  47. except ImportError: # Python 2
  48. import httplib as compat_http_client
  49. try:
  50. from urllib.error import HTTPError as compat_HTTPError
  51. except ImportError: # Python 2
  52. from urllib2 import HTTPError as compat_HTTPError
  53. try:
  54. from urllib.request import urlretrieve as compat_urlretrieve
  55. except ImportError: # Python 2
  56. from urllib import urlretrieve as compat_urlretrieve
  57. try:
  58. from subprocess import DEVNULL
  59. compat_subprocess_get_DEVNULL = lambda: DEVNULL
  60. except ImportError:
  61. compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
  62. try:
  63. import http.server as compat_http_server
  64. except ImportError:
  65. import BaseHTTPServer as compat_http_server
  66. try:
  67. compat_str = unicode # Python 2
  68. except NameError:
  69. compat_str = str
  70. try:
  71. from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes
  72. from urllib.parse import unquote as compat_urllib_parse_unquote
  73. from urllib.parse import unquote_plus as compat_urllib_parse_unquote_plus
  74. except ImportError: # Python 2
  75. _asciire = (compat_urllib_parse._asciire if hasattr(compat_urllib_parse, '_asciire')
  76. else re.compile('([\x00-\x7f]+)'))
  77. # HACK: The following are the correct unquote_to_bytes, unquote and unquote_plus
  78. # implementations from cpython 3.4.3's stdlib. Python 2's version
  79. # is apparently broken (see https://github.com/rg3/youtube-dl/pull/6244)
  80. def compat_urllib_parse_unquote_to_bytes(string):
  81. """unquote_to_bytes('abc%20def') -> b'abc def'."""
  82. # Note: strings are encoded as UTF-8. This is only an issue if it contains
  83. # unescaped non-ASCII characters, which URIs should not.
  84. if not string:
  85. # Is it a string-like object?
  86. string.split
  87. return b''
  88. if isinstance(string, compat_str):
  89. string = string.encode('utf-8')
  90. bits = string.split(b'%')
  91. if len(bits) == 1:
  92. return string
  93. res = [bits[0]]
  94. append = res.append
  95. for item in bits[1:]:
  96. try:
  97. append(compat_urllib_parse._hextochr[item[:2]])
  98. append(item[2:])
  99. except KeyError:
  100. append(b'%')
  101. append(item)
  102. return b''.join(res)
  103. def compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'):
  104. """Replace %xx escapes by their single-character equivalent. The optional
  105. encoding and errors parameters specify how to decode percent-encoded
  106. sequences into Unicode characters, as accepted by the bytes.decode()
  107. method.
  108. By default, percent-encoded sequences are decoded with UTF-8, and invalid
  109. sequences are replaced by a placeholder character.
  110. unquote('abc%20def') -> 'abc def'.
  111. """
  112. if '%' not in string:
  113. string.split
  114. return string
  115. if encoding is None:
  116. encoding = 'utf-8'
  117. if errors is None:
  118. errors = 'replace'
  119. bits = _asciire.split(string)
  120. res = [bits[0]]
  121. append = res.append
  122. for i in range(1, len(bits), 2):
  123. append(compat_urllib_parse_unquote_to_bytes(bits[i]).decode(encoding, errors))
  124. append(bits[i + 1])
  125. return ''.join(res)
  126. def compat_urllib_parse_unquote_plus(string, encoding='utf-8', errors='replace'):
  127. """Like unquote(), but also replace plus signs by spaces, as required for
  128. unquoting HTML form values.
  129. unquote_plus('%7e/abc+def') -> '~/abc def'
  130. """
  131. string = string.replace('+', ' ')
  132. return compat_urllib_parse_unquote(string, encoding, errors)
  133. try:
  134. compat_basestring = basestring # Python 2
  135. except NameError:
  136. compat_basestring = str
  137. try:
  138. compat_chr = unichr # Python 2
  139. except NameError:
  140. compat_chr = chr
  141. try:
  142. from xml.etree.ElementTree import ParseError as compat_xml_parse_error
  143. except ImportError: # Python 2.6
  144. from xml.parsers.expat import ExpatError as compat_xml_parse_error
  145. try:
  146. from urllib.parse import parse_qs as compat_parse_qs
  147. except ImportError: # Python 2
  148. # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
  149. # Python 2's version is apparently totally broken
  150. def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
  151. encoding='utf-8', errors='replace'):
  152. qs, _coerce_result = qs, compat_str
  153. pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
  154. r = []
  155. for name_value in pairs:
  156. if not name_value and not strict_parsing:
  157. continue
  158. nv = name_value.split('=', 1)
  159. if len(nv) != 2:
  160. if strict_parsing:
  161. raise ValueError("bad query field: %r" % (name_value,))
  162. # Handle case of a control-name with no equal sign
  163. if keep_blank_values:
  164. nv.append('')
  165. else:
  166. continue
  167. if len(nv[1]) or keep_blank_values:
  168. name = nv[0].replace('+', ' ')
  169. name = compat_urllib_parse_unquote(
  170. name, encoding=encoding, errors=errors)
  171. name = _coerce_result(name)
  172. value = nv[1].replace('+', ' ')
  173. value = compat_urllib_parse_unquote(
  174. value, encoding=encoding, errors=errors)
  175. value = _coerce_result(value)
  176. r.append((name, value))
  177. return r
  178. def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
  179. encoding='utf-8', errors='replace'):
  180. parsed_result = {}
  181. pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
  182. encoding=encoding, errors=errors)
  183. for name, value in pairs:
  184. if name in parsed_result:
  185. parsed_result[name].append(value)
  186. else:
  187. parsed_result[name] = [value]
  188. return parsed_result
  189. try:
  190. from shlex import quote as shlex_quote
  191. except ImportError: # Python < 3.3
  192. def shlex_quote(s):
  193. if re.match(r'^[-_\w./]+$', s):
  194. return s
  195. else:
  196. return "'" + s.replace("'", "'\"'\"'") + "'"
  197. if sys.version_info >= (2, 7, 3):
  198. compat_shlex_split = shlex.split
  199. else:
  200. # Working around shlex issue with unicode strings on some python 2
  201. # versions (see http://bugs.python.org/issue1548891)
  202. def compat_shlex_split(s, comments=False, posix=True):
  203. if isinstance(s, compat_str):
  204. s = s.encode('utf-8')
  205. return shlex.split(s, comments, posix)
  206. def compat_ord(c):
  207. if type(c) is int:
  208. return c
  209. else:
  210. return ord(c)
  211. if sys.version_info >= (3, 0):
  212. compat_getenv = os.getenv
  213. compat_expanduser = os.path.expanduser
  214. else:
  215. # Environment variables should be decoded with filesystem encoding.
  216. # Otherwise it will fail if any non-ASCII characters present (see #3854 #3217 #2918)
  217. def compat_getenv(key, default=None):
  218. from .utils import get_filesystem_encoding
  219. env = os.getenv(key, default)
  220. if env:
  221. env = env.decode(get_filesystem_encoding())
  222. return env
  223. # HACK: The default implementations of os.path.expanduser from cpython do not decode
  224. # environment variables with filesystem encoding. We will work around this by
  225. # providing adjusted implementations.
  226. # The following are os.path.expanduser implementations from cpython 2.7.8 stdlib
  227. # for different platforms with correct environment variables decoding.
  228. if os.name == 'posix':
  229. def compat_expanduser(path):
  230. """Expand ~ and ~user constructions. If user or $HOME is unknown,
  231. do nothing."""
  232. if not path.startswith('~'):
  233. return path
  234. i = path.find('/', 1)
  235. if i < 0:
  236. i = len(path)
  237. if i == 1:
  238. if 'HOME' not in os.environ:
  239. import pwd
  240. userhome = pwd.getpwuid(os.getuid()).pw_dir
  241. else:
  242. userhome = compat_getenv('HOME')
  243. else:
  244. import pwd
  245. try:
  246. pwent = pwd.getpwnam(path[1:i])
  247. except KeyError:
  248. return path
  249. userhome = pwent.pw_dir
  250. userhome = userhome.rstrip('/')
  251. return (userhome + path[i:]) or '/'
  252. elif os.name == 'nt' or os.name == 'ce':
  253. def compat_expanduser(path):
  254. """Expand ~ and ~user constructs.
  255. If user or $HOME is unknown, do nothing."""
  256. if path[:1] != '~':
  257. return path
  258. i, n = 1, len(path)
  259. while i < n and path[i] not in '/\\':
  260. i = i + 1
  261. if 'HOME' in os.environ:
  262. userhome = compat_getenv('HOME')
  263. elif 'USERPROFILE' in os.environ:
  264. userhome = compat_getenv('USERPROFILE')
  265. elif 'HOMEPATH' not in os.environ:
  266. return path
  267. else:
  268. try:
  269. drive = compat_getenv('HOMEDRIVE')
  270. except KeyError:
  271. drive = ''
  272. userhome = os.path.join(drive, compat_getenv('HOMEPATH'))
  273. if i != 1: # ~user
  274. userhome = os.path.join(os.path.dirname(userhome), path[1:i])
  275. return userhome + path[i:]
  276. else:
  277. compat_expanduser = os.path.expanduser
  278. if sys.version_info < (3, 0):
  279. def compat_print(s):
  280. from .utils import preferredencoding
  281. print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
  282. else:
  283. def compat_print(s):
  284. assert isinstance(s, compat_str)
  285. print(s)
  286. try:
  287. subprocess_check_output = subprocess.check_output
  288. except AttributeError:
  289. def subprocess_check_output(*args, **kwargs):
  290. assert 'input' not in kwargs
  291. p = subprocess.Popen(*args, stdout=subprocess.PIPE, **kwargs)
  292. output, _ = p.communicate()
  293. ret = p.poll()
  294. if ret:
  295. raise subprocess.CalledProcessError(ret, p.args, output=output)
  296. return output
  297. if sys.version_info < (3, 0) and sys.platform == 'win32':
  298. def compat_getpass(prompt, *args, **kwargs):
  299. if isinstance(prompt, compat_str):
  300. from .utils import preferredencoding
  301. prompt = prompt.encode(preferredencoding())
  302. return getpass.getpass(prompt, *args, **kwargs)
  303. else:
  304. compat_getpass = getpass.getpass
  305. # Old 2.6 and 2.7 releases require kwargs to be bytes
  306. try:
  307. def _testfunc(x):
  308. pass
  309. _testfunc(**{'x': 0})
  310. except TypeError:
  311. def compat_kwargs(kwargs):
  312. return dict((bytes(k), v) for k, v in kwargs.items())
  313. else:
  314. compat_kwargs = lambda kwargs: kwargs
  315. if sys.version_info < (2, 7):
  316. def compat_socket_create_connection(address, timeout, source_address=None):
  317. host, port = address
  318. err = None
  319. for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM):
  320. af, socktype, proto, canonname, sa = res
  321. sock = None
  322. try:
  323. sock = socket.socket(af, socktype, proto)
  324. sock.settimeout(timeout)
  325. if source_address:
  326. sock.bind(source_address)
  327. sock.connect(sa)
  328. return sock
  329. except socket.error as _:
  330. err = _
  331. if sock is not None:
  332. sock.close()
  333. if err is not None:
  334. raise err
  335. else:
  336. raise socket.error("getaddrinfo returns an empty list")
  337. else:
  338. compat_socket_create_connection = socket.create_connection
  339. # Fix https://github.com/rg3/youtube-dl/issues/4223
  340. # See http://bugs.python.org/issue9161 for what is broken
  341. def workaround_optparse_bug9161():
  342. op = optparse.OptionParser()
  343. og = optparse.OptionGroup(op, 'foo')
  344. try:
  345. og.add_option('-t')
  346. except TypeError:
  347. real_add_option = optparse.OptionGroup.add_option
  348. def _compat_add_option(self, *args, **kwargs):
  349. enc = lambda v: (
  350. v.encode('ascii', 'replace') if isinstance(v, compat_str)
  351. else v)
  352. bargs = [enc(a) for a in args]
  353. bkwargs = dict(
  354. (k, enc(v)) for k, v in kwargs.items())
  355. return real_add_option(self, *bargs, **bkwargs)
  356. optparse.OptionGroup.add_option = _compat_add_option
  357. if hasattr(shutil, 'get_terminal_size'): # Python >= 3.3
  358. compat_get_terminal_size = shutil.get_terminal_size
  359. else:
  360. _terminal_size = collections.namedtuple('terminal_size', ['columns', 'lines'])
  361. def compat_get_terminal_size(fallback=(80, 24)):
  362. columns = compat_getenv('COLUMNS', None)
  363. if columns:
  364. columns = int(columns)
  365. else:
  366. columns = None
  367. lines = compat_getenv('LINES', None)
  368. if lines:
  369. lines = int(lines)
  370. else:
  371. lines = None
  372. if columns <= 0 or lines <= 0:
  373. try:
  374. sp = subprocess.Popen(
  375. ['stty', 'size'],
  376. stdout=subprocess.PIPE, stderr=subprocess.PIPE)
  377. out, err = sp.communicate()
  378. _columns, _lines = map(int, out.split())
  379. except Exception:
  380. _columns, _lines = _terminal_size(*fallback)
  381. if columns <= 0:
  382. columns = _columns
  383. if lines <= 0:
  384. lines = _lines
  385. return _terminal_size(columns, lines)
  386. try:
  387. itertools.count(start=0, step=1)
  388. compat_itertools_count = itertools.count
  389. except TypeError: # Python 2.6
  390. def compat_itertools_count(start=0, step=1):
  391. n = start
  392. while True:
  393. yield n
  394. n += step
  395. if sys.version_info >= (3, 0):
  396. from tokenize import tokenize as compat_tokenize_tokenize
  397. else:
  398. from tokenize import generate_tokens as compat_tokenize_tokenize
  399. __all__ = [
  400. 'compat_HTTPError',
  401. 'compat_basestring',
  402. 'compat_chr',
  403. 'compat_cookiejar',
  404. 'compat_cookies',
  405. 'compat_expanduser',
  406. 'compat_get_terminal_size',
  407. 'compat_getenv',
  408. 'compat_getpass',
  409. 'compat_html_entities',
  410. 'compat_http_client',
  411. 'compat_http_server',
  412. 'compat_itertools_count',
  413. 'compat_kwargs',
  414. 'compat_ord',
  415. 'compat_parse_qs',
  416. 'compat_print',
  417. 'compat_shlex_split',
  418. 'compat_socket_create_connection',
  419. 'compat_str',
  420. 'compat_subprocess_get_DEVNULL',
  421. 'compat_tokenize_tokenize',
  422. 'compat_urllib_error',
  423. 'compat_urllib_parse',
  424. 'compat_urllib_parse_unquote',
  425. 'compat_urllib_parse_unquote_plus',
  426. 'compat_urllib_parse_unquote_to_bytes',
  427. 'compat_urllib_parse_urlparse',
  428. 'compat_urllib_request',
  429. 'compat_urlparse',
  430. 'compat_urlretrieve',
  431. 'compat_xml_parse_error',
  432. 'shlex_quote',
  433. 'subprocess_check_output',
  434. 'workaround_optparse_bug9161',
  435. ]