You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

662 lines
22 KiB

10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
9 years ago
10 years ago
10 years ago
10 years ago
  1. from __future__ import unicode_literals
  2. import binascii
  3. import collections
  4. import email
  5. import getpass
  6. import io
  7. import optparse
  8. import os
  9. import re
  10. import shlex
  11. import shutil
  12. import socket
  13. import struct
  14. import subprocess
  15. import sys
  16. import itertools
  17. import xml.etree.ElementTree
  18. try:
  19. import urllib.request as compat_urllib_request
  20. except ImportError: # Python 2
  21. import urllib2 as compat_urllib_request
  22. try:
  23. import urllib.error as compat_urllib_error
  24. except ImportError: # Python 2
  25. import urllib2 as compat_urllib_error
  26. try:
  27. import urllib.parse as compat_urllib_parse
  28. except ImportError: # Python 2
  29. import urllib as compat_urllib_parse
  30. try:
  31. from urllib.parse import urlparse as compat_urllib_parse_urlparse
  32. except ImportError: # Python 2
  33. from urlparse import urlparse as compat_urllib_parse_urlparse
  34. try:
  35. import urllib.parse as compat_urlparse
  36. except ImportError: # Python 2
  37. import urlparse as compat_urlparse
  38. try:
  39. import urllib.response as compat_urllib_response
  40. except ImportError: # Python 2
  41. import urllib as compat_urllib_response
  42. try:
  43. import http.cookiejar as compat_cookiejar
  44. except ImportError: # Python 2
  45. import cookielib as compat_cookiejar
  46. try:
  47. import http.cookies as compat_cookies
  48. except ImportError: # Python 2
  49. import Cookie as compat_cookies
  50. try:
  51. import html.entities as compat_html_entities
  52. except ImportError: # Python 2
  53. import htmlentitydefs as compat_html_entities
  54. try:
  55. import http.client as compat_http_client
  56. except ImportError: # Python 2
  57. import httplib as compat_http_client
  58. try:
  59. from urllib.error import HTTPError as compat_HTTPError
  60. except ImportError: # Python 2
  61. from urllib2 import HTTPError as compat_HTTPError
  62. try:
  63. from urllib.request import urlretrieve as compat_urlretrieve
  64. except ImportError: # Python 2
  65. from urllib import urlretrieve as compat_urlretrieve
  66. try:
  67. from html.parser import HTMLParser as compat_HTMLParser
  68. except ImportError: # Python 2
  69. from HTMLParser import HTMLParser as compat_HTMLParser
  70. try:
  71. from subprocess import DEVNULL
  72. compat_subprocess_get_DEVNULL = lambda: DEVNULL
  73. except ImportError:
  74. compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
  75. try:
  76. import http.server as compat_http_server
  77. except ImportError:
  78. import BaseHTTPServer as compat_http_server
  79. try:
  80. compat_str = unicode # Python 2
  81. except NameError:
  82. compat_str = str
  83. try:
  84. from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes
  85. from urllib.parse import unquote as compat_urllib_parse_unquote
  86. from urllib.parse import unquote_plus as compat_urllib_parse_unquote_plus
  87. except ImportError: # Python 2
  88. _asciire = (compat_urllib_parse._asciire if hasattr(compat_urllib_parse, '_asciire')
  89. else re.compile('([\x00-\x7f]+)'))
  90. # HACK: The following are the correct unquote_to_bytes, unquote and unquote_plus
  91. # implementations from cpython 3.4.3's stdlib. Python 2's version
  92. # is apparently broken (see https://github.com/rg3/youtube-dl/pull/6244)
  93. def compat_urllib_parse_unquote_to_bytes(string):
  94. """unquote_to_bytes('abc%20def') -> b'abc def'."""
  95. # Note: strings are encoded as UTF-8. This is only an issue if it contains
  96. # unescaped non-ASCII characters, which URIs should not.
  97. if not string:
  98. # Is it a string-like object?
  99. string.split
  100. return b''
  101. if isinstance(string, compat_str):
  102. string = string.encode('utf-8')
  103. bits = string.split(b'%')
  104. if len(bits) == 1:
  105. return string
  106. res = [bits[0]]
  107. append = res.append
  108. for item in bits[1:]:
  109. try:
  110. append(compat_urllib_parse._hextochr[item[:2]])
  111. append(item[2:])
  112. except KeyError:
  113. append(b'%')
  114. append(item)
  115. return b''.join(res)
  116. def compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'):
  117. """Replace %xx escapes by their single-character equivalent. The optional
  118. encoding and errors parameters specify how to decode percent-encoded
  119. sequences into Unicode characters, as accepted by the bytes.decode()
  120. method.
  121. By default, percent-encoded sequences are decoded with UTF-8, and invalid
  122. sequences are replaced by a placeholder character.
  123. unquote('abc%20def') -> 'abc def'.
  124. """
  125. if '%' not in string:
  126. string.split
  127. return string
  128. if encoding is None:
  129. encoding = 'utf-8'
  130. if errors is None:
  131. errors = 'replace'
  132. bits = _asciire.split(string)
  133. res = [bits[0]]
  134. append = res.append
  135. for i in range(1, len(bits), 2):
  136. append(compat_urllib_parse_unquote_to_bytes(bits[i]).decode(encoding, errors))
  137. append(bits[i + 1])
  138. return ''.join(res)
  139. def compat_urllib_parse_unquote_plus(string, encoding='utf-8', errors='replace'):
  140. """Like unquote(), but also replace plus signs by spaces, as required for
  141. unquoting HTML form values.
  142. unquote_plus('%7e/abc+def') -> '~/abc def'
  143. """
  144. string = string.replace('+', ' ')
  145. return compat_urllib_parse_unquote(string, encoding, errors)
  146. try:
  147. from urllib.parse import urlencode as compat_urllib_parse_urlencode
  148. except ImportError: # Python 2
  149. # Python 2 will choke in urlencode on mixture of byte and unicode strings.
  150. # Possible solutions are to either port it from python 3 with all
  151. # the friends or manually ensure input query contains only byte strings.
  152. # We will stick with latter thus recursively encoding the whole query.
  153. def compat_urllib_parse_urlencode(query, doseq=0, encoding='utf-8'):
  154. def encode_elem(e):
  155. if isinstance(e, dict):
  156. e = encode_dict(e)
  157. elif isinstance(e, (list, tuple,)):
  158. list_e = encode_list(e)
  159. e = tuple(list_e) if isinstance(e, tuple) else list_e
  160. elif isinstance(e, compat_str):
  161. e = e.encode(encoding)
  162. return e
  163. def encode_dict(d):
  164. return dict((encode_elem(k), encode_elem(v)) for k, v in d.items())
  165. def encode_list(l):
  166. return [encode_elem(e) for e in l]
  167. return compat_urllib_parse.urlencode(encode_elem(query), doseq=doseq)
  168. try:
  169. from urllib.request import DataHandler as compat_urllib_request_DataHandler
  170. except ImportError: # Python < 3.4
  171. # Ported from CPython 98774:1733b3bd46db, Lib/urllib/request.py
  172. class compat_urllib_request_DataHandler(compat_urllib_request.BaseHandler):
  173. def data_open(self, req):
  174. # data URLs as specified in RFC 2397.
  175. #
  176. # ignores POSTed data
  177. #
  178. # syntax:
  179. # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
  180. # mediatype := [ type "/" subtype ] *( ";" parameter )
  181. # data := *urlchar
  182. # parameter := attribute "=" value
  183. url = req.get_full_url()
  184. scheme, data = url.split(':', 1)
  185. mediatype, data = data.split(',', 1)
  186. # even base64 encoded data URLs might be quoted so unquote in any case:
  187. data = compat_urllib_parse_unquote_to_bytes(data)
  188. if mediatype.endswith(';base64'):
  189. data = binascii.a2b_base64(data)
  190. mediatype = mediatype[:-7]
  191. if not mediatype:
  192. mediatype = 'text/plain;charset=US-ASCII'
  193. headers = email.message_from_string(
  194. 'Content-type: %s\nContent-length: %d\n' % (mediatype, len(data)))
  195. return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url)
  196. try:
  197. compat_basestring = basestring # Python 2
  198. except NameError:
  199. compat_basestring = str
  200. try:
  201. compat_chr = unichr # Python 2
  202. except NameError:
  203. compat_chr = chr
  204. try:
  205. from xml.etree.ElementTree import ParseError as compat_xml_parse_error
  206. except ImportError: # Python 2.6
  207. from xml.parsers.expat import ExpatError as compat_xml_parse_error
  208. etree = xml.etree.ElementTree
  209. class _TreeBuilder(etree.TreeBuilder):
  210. def doctype(self, name, pubid, system):
  211. pass
  212. if sys.version_info[0] >= 3:
  213. def compat_etree_fromstring(text):
  214. return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder()))
  215. else:
  216. # python 2.x tries to encode unicode strings with ascii (see the
  217. # XMLParser._fixtext method)
  218. try:
  219. _etree_iter = etree.Element.iter
  220. except AttributeError: # Python <=2.6
  221. def _etree_iter(root):
  222. for el in root.findall('*'):
  223. yield el
  224. for sub in _etree_iter(el):
  225. yield sub
  226. # on 2.6 XML doesn't have a parser argument, function copied from CPython
  227. # 2.7 source
  228. def _XML(text, parser=None):
  229. if not parser:
  230. parser = etree.XMLParser(target=_TreeBuilder())
  231. parser.feed(text)
  232. return parser.close()
  233. def _element_factory(*args, **kwargs):
  234. el = etree.Element(*args, **kwargs)
  235. for k, v in el.items():
  236. if isinstance(v, bytes):
  237. el.set(k, v.decode('utf-8'))
  238. return el
  239. def compat_etree_fromstring(text):
  240. doc = _XML(text, parser=etree.XMLParser(target=_TreeBuilder(element_factory=_element_factory)))
  241. for el in _etree_iter(doc):
  242. if el.text is not None and isinstance(el.text, bytes):
  243. el.text = el.text.decode('utf-8')
  244. return doc
  245. if sys.version_info < (2, 7):
  246. # Here comes the crazy part: In 2.6, if the xpath is a unicode,
  247. # .//node does not match if a node is a direct child of . !
  248. def compat_xpath(xpath):
  249. if isinstance(xpath, compat_str):
  250. xpath = xpath.encode('ascii')
  251. return xpath
  252. else:
  253. compat_xpath = lambda xpath: xpath
  254. try:
  255. from urllib.parse import parse_qs as compat_parse_qs
  256. except ImportError: # Python 2
  257. # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
  258. # Python 2's version is apparently totally broken
  259. def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
  260. encoding='utf-8', errors='replace'):
  261. qs, _coerce_result = qs, compat_str
  262. pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
  263. r = []
  264. for name_value in pairs:
  265. if not name_value and not strict_parsing:
  266. continue
  267. nv = name_value.split('=', 1)
  268. if len(nv) != 2:
  269. if strict_parsing:
  270. raise ValueError('bad query field: %r' % (name_value,))
  271. # Handle case of a control-name with no equal sign
  272. if keep_blank_values:
  273. nv.append('')
  274. else:
  275. continue
  276. if len(nv[1]) or keep_blank_values:
  277. name = nv[0].replace('+', ' ')
  278. name = compat_urllib_parse_unquote(
  279. name, encoding=encoding, errors=errors)
  280. name = _coerce_result(name)
  281. value = nv[1].replace('+', ' ')
  282. value = compat_urllib_parse_unquote(
  283. value, encoding=encoding, errors=errors)
  284. value = _coerce_result(value)
  285. r.append((name, value))
  286. return r
  287. def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
  288. encoding='utf-8', errors='replace'):
  289. parsed_result = {}
  290. pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
  291. encoding=encoding, errors=errors)
  292. for name, value in pairs:
  293. if name in parsed_result:
  294. parsed_result[name].append(value)
  295. else:
  296. parsed_result[name] = [value]
  297. return parsed_result
  298. try:
  299. from shlex import quote as compat_shlex_quote
  300. except ImportError: # Python < 3.3
  301. def compat_shlex_quote(s):
  302. if re.match(r'^[-_\w./]+$', s):
  303. return s
  304. else:
  305. return "'" + s.replace("'", "'\"'\"'") + "'"
  306. if sys.version_info >= (2, 7, 3):
  307. compat_shlex_split = shlex.split
  308. else:
  309. # Working around shlex issue with unicode strings on some python 2
  310. # versions (see http://bugs.python.org/issue1548891)
  311. def compat_shlex_split(s, comments=False, posix=True):
  312. if isinstance(s, compat_str):
  313. s = s.encode('utf-8')
  314. return shlex.split(s, comments, posix)
  315. def compat_ord(c):
  316. if type(c) is int:
  317. return c
  318. else:
  319. return ord(c)
  320. compat_os_name = os._name if os.name == 'java' else os.name
  321. if sys.version_info >= (3, 0):
  322. compat_getenv = os.getenv
  323. compat_expanduser = os.path.expanduser
  324. def compat_setenv(key, value, env=os.environ):
  325. env[key] = value
  326. else:
  327. # Environment variables should be decoded with filesystem encoding.
  328. # Otherwise it will fail if any non-ASCII characters present (see #3854 #3217 #2918)
  329. def compat_getenv(key, default=None):
  330. from .utils import get_filesystem_encoding
  331. env = os.getenv(key, default)
  332. if env:
  333. env = env.decode(get_filesystem_encoding())
  334. return env
  335. def compat_setenv(key, value, env=os.environ):
  336. def encode(v):
  337. from .utils import get_filesystem_encoding
  338. return v.encode(get_filesystem_encoding()) if isinstance(v, compat_str) else v
  339. env[encode(key)] = encode(value)
  340. # HACK: The default implementations of os.path.expanduser from cpython do not decode
  341. # environment variables with filesystem encoding. We will work around this by
  342. # providing adjusted implementations.
  343. # The following are os.path.expanduser implementations from cpython 2.7.8 stdlib
  344. # for different platforms with correct environment variables decoding.
  345. if compat_os_name == 'posix':
  346. def compat_expanduser(path):
  347. """Expand ~ and ~user constructions. If user or $HOME is unknown,
  348. do nothing."""
  349. if not path.startswith('~'):
  350. return path
  351. i = path.find('/', 1)
  352. if i < 0:
  353. i = len(path)
  354. if i == 1:
  355. if 'HOME' not in os.environ:
  356. import pwd
  357. userhome = pwd.getpwuid(os.getuid()).pw_dir
  358. else:
  359. userhome = compat_getenv('HOME')
  360. else:
  361. import pwd
  362. try:
  363. pwent = pwd.getpwnam(path[1:i])
  364. except KeyError:
  365. return path
  366. userhome = pwent.pw_dir
  367. userhome = userhome.rstrip('/')
  368. return (userhome + path[i:]) or '/'
  369. elif compat_os_name == 'nt' or compat_os_name == 'ce':
  370. def compat_expanduser(path):
  371. """Expand ~ and ~user constructs.
  372. If user or $HOME is unknown, do nothing."""
  373. if path[:1] != '~':
  374. return path
  375. i, n = 1, len(path)
  376. while i < n and path[i] not in '/\\':
  377. i = i + 1
  378. if 'HOME' in os.environ:
  379. userhome = compat_getenv('HOME')
  380. elif 'USERPROFILE' in os.environ:
  381. userhome = compat_getenv('USERPROFILE')
  382. elif 'HOMEPATH' not in os.environ:
  383. return path
  384. else:
  385. try:
  386. drive = compat_getenv('HOMEDRIVE')
  387. except KeyError:
  388. drive = ''
  389. userhome = os.path.join(drive, compat_getenv('HOMEPATH'))
  390. if i != 1: # ~user
  391. userhome = os.path.join(os.path.dirname(userhome), path[1:i])
  392. return userhome + path[i:]
  393. else:
  394. compat_expanduser = os.path.expanduser
  395. if sys.version_info < (3, 0):
  396. def compat_print(s):
  397. from .utils import preferredencoding
  398. print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
  399. else:
  400. def compat_print(s):
  401. assert isinstance(s, compat_str)
  402. print(s)
  403. if sys.version_info < (3, 0) and sys.platform == 'win32':
  404. def compat_getpass(prompt, *args, **kwargs):
  405. if isinstance(prompt, compat_str):
  406. from .utils import preferredencoding
  407. prompt = prompt.encode(preferredencoding())
  408. return getpass.getpass(prompt, *args, **kwargs)
  409. else:
  410. compat_getpass = getpass.getpass
  411. try:
  412. compat_input = raw_input
  413. except NameError: # Python 3
  414. compat_input = input
  415. # Python < 2.6.5 require kwargs to be bytes
  416. try:
  417. def _testfunc(x):
  418. pass
  419. _testfunc(**{'x': 0})
  420. except TypeError:
  421. def compat_kwargs(kwargs):
  422. return dict((bytes(k), v) for k, v in kwargs.items())
  423. else:
  424. compat_kwargs = lambda kwargs: kwargs
  425. if sys.version_info < (2, 7):
  426. def compat_socket_create_connection(address, timeout, source_address=None):
  427. host, port = address
  428. err = None
  429. for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM):
  430. af, socktype, proto, canonname, sa = res
  431. sock = None
  432. try:
  433. sock = socket.socket(af, socktype, proto)
  434. sock.settimeout(timeout)
  435. if source_address:
  436. sock.bind(source_address)
  437. sock.connect(sa)
  438. return sock
  439. except socket.error as _:
  440. err = _
  441. if sock is not None:
  442. sock.close()
  443. if err is not None:
  444. raise err
  445. else:
  446. raise socket.error('getaddrinfo returns an empty list')
  447. else:
  448. compat_socket_create_connection = socket.create_connection
  449. # Fix https://github.com/rg3/youtube-dl/issues/4223
  450. # See http://bugs.python.org/issue9161 for what is broken
  451. def workaround_optparse_bug9161():
  452. op = optparse.OptionParser()
  453. og = optparse.OptionGroup(op, 'foo')
  454. try:
  455. og.add_option('-t')
  456. except TypeError:
  457. real_add_option = optparse.OptionGroup.add_option
  458. def _compat_add_option(self, *args, **kwargs):
  459. enc = lambda v: (
  460. v.encode('ascii', 'replace') if isinstance(v, compat_str)
  461. else v)
  462. bargs = [enc(a) for a in args]
  463. bkwargs = dict(
  464. (k, enc(v)) for k, v in kwargs.items())
  465. return real_add_option(self, *bargs, **bkwargs)
  466. optparse.OptionGroup.add_option = _compat_add_option
  467. if hasattr(shutil, 'get_terminal_size'): # Python >= 3.3
  468. compat_get_terminal_size = shutil.get_terminal_size
  469. else:
  470. _terminal_size = collections.namedtuple('terminal_size', ['columns', 'lines'])
  471. def compat_get_terminal_size(fallback=(80, 24)):
  472. columns = compat_getenv('COLUMNS')
  473. if columns:
  474. columns = int(columns)
  475. else:
  476. columns = None
  477. lines = compat_getenv('LINES')
  478. if lines:
  479. lines = int(lines)
  480. else:
  481. lines = None
  482. if columns is None or lines is None or columns <= 0 or lines <= 0:
  483. try:
  484. sp = subprocess.Popen(
  485. ['stty', 'size'],
  486. stdout=subprocess.PIPE, stderr=subprocess.PIPE)
  487. out, err = sp.communicate()
  488. _lines, _columns = map(int, out.split())
  489. except Exception:
  490. _columns, _lines = _terminal_size(*fallback)
  491. if columns is None or columns <= 0:
  492. columns = _columns
  493. if lines is None or lines <= 0:
  494. lines = _lines
  495. return _terminal_size(columns, lines)
  496. try:
  497. itertools.count(start=0, step=1)
  498. compat_itertools_count = itertools.count
  499. except TypeError: # Python 2.6
  500. def compat_itertools_count(start=0, step=1):
  501. n = start
  502. while True:
  503. yield n
  504. n += step
  505. if sys.version_info >= (3, 0):
  506. from tokenize import tokenize as compat_tokenize_tokenize
  507. else:
  508. from tokenize import generate_tokens as compat_tokenize_tokenize
  509. try:
  510. struct.pack('!I', 0)
  511. except TypeError:
  512. # In Python 2.6 and 2.7.x < 2.7.7, struct requires a bytes argument
  513. # See https://bugs.python.org/issue19099
  514. def compat_struct_pack(spec, *args):
  515. if isinstance(spec, compat_str):
  516. spec = spec.encode('ascii')
  517. return struct.pack(spec, *args)
  518. def compat_struct_unpack(spec, *args):
  519. if isinstance(spec, compat_str):
  520. spec = spec.encode('ascii')
  521. return struct.unpack(spec, *args)
  522. else:
  523. compat_struct_pack = struct.pack
  524. compat_struct_unpack = struct.unpack
  525. __all__ = [
  526. 'compat_HTMLParser',
  527. 'compat_HTTPError',
  528. 'compat_basestring',
  529. 'compat_chr',
  530. 'compat_cookiejar',
  531. 'compat_cookies',
  532. 'compat_etree_fromstring',
  533. 'compat_expanduser',
  534. 'compat_get_terminal_size',
  535. 'compat_getenv',
  536. 'compat_getpass',
  537. 'compat_html_entities',
  538. 'compat_http_client',
  539. 'compat_http_server',
  540. 'compat_input',
  541. 'compat_itertools_count',
  542. 'compat_kwargs',
  543. 'compat_ord',
  544. 'compat_os_name',
  545. 'compat_parse_qs',
  546. 'compat_print',
  547. 'compat_setenv',
  548. 'compat_shlex_quote',
  549. 'compat_shlex_split',
  550. 'compat_socket_create_connection',
  551. 'compat_str',
  552. 'compat_struct_pack',
  553. 'compat_struct_unpack',
  554. 'compat_subprocess_get_DEVNULL',
  555. 'compat_tokenize_tokenize',
  556. 'compat_urllib_error',
  557. 'compat_urllib_parse',
  558. 'compat_urllib_parse_unquote',
  559. 'compat_urllib_parse_unquote_plus',
  560. 'compat_urllib_parse_unquote_to_bytes',
  561. 'compat_urllib_parse_urlencode',
  562. 'compat_urllib_parse_urlparse',
  563. 'compat_urllib_request',
  564. 'compat_urllib_request_DataHandler',
  565. 'compat_urllib_response',
  566. 'compat_urlparse',
  567. 'compat_urlretrieve',
  568. 'compat_xml_parse_error',
  569. 'compat_xpath',
  570. 'workaround_optparse_bug9161',
  571. ]