Split code as a package, compiled into an executable zip

13 years ago · d77c3dfd02
--- a/+ 4
+++ b/+ 4
@ -18,6 +18,9 @@ update-readme:
 		echo "$${footer}" >> README.md

 compile:
 	cp youtube_dl/__init__.py youtube-dl
 	zip --junk-paths youtube-dl youtube_dl/*.py
 	echo '#!/usr/bin/env python' > youtube-dl
 	cat youtube-dl.zip >> youtube-dl
 	rm youtube-dl.zip

 .PHONY: default compile update update-latest update-readme
--- a/BIN
+++ b/BIN
--- a/youtube_dl/FileDownloader.py
+++ b/youtube_dl/FileDownloader.py
@ -0,0 +1,681 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-

 import httplib
 import math
 import os
 import re
 import socket
 import subprocess
 import sys
 import time
 import urllib2

 if os.name == 'nt':
 	import ctypes
 	
 from Utils import *


 class FileDownloader(object):
 	"""File Downloader class.

 	File downloader objects are the ones responsible of downloading the
 	actual video file and writing it to disk if the user has requested
 	it, among some other tasks. In most cases there should be one per
 	program. As, given a video URL, the downloader doesn't know how to
 	extract all the needed information, task that InfoExtractors do, it
 	has to pass the URL to one of them.

 	For this, file downloader objects have a method that allows
 	InfoExtractors to be registered in a given order. When it is passed
 	a URL, the file downloader handles it to the first InfoExtractor it
 	finds that reports being able to handle it. The InfoExtractor extracts
 	all the information about the video or videos the URL refers to, and
 	asks the FileDownloader to process the video information, possibly
 	downloading the video.

 	File downloaders accept a lot of parameters. In order not to saturate
 	the object constructor with arguments, it receives a dictionary of
 	options instead. These options are available through the params
 	attribute for the InfoExtractors to use. The FileDownloader also
 	registers itself as the downloader in charge for the InfoExtractors
 	that are added to it, so this is a "mutual registration".

 	Available options:

 	username:         Username for authentication purposes.
 	password:         Password for authentication purposes.
 	usenetrc:         Use netrc for authentication instead.
 	quiet:            Do not print messages to stdout.
 	forceurl:         Force printing final URL.
 	forcetitle:       Force printing title.
 	forcethumbnail:   Force printing thumbnail URL.
 	forcedescription: Force printing description.
 	forcefilename:    Force printing final filename.
 	simulate:         Do not download the video files.
 	format:           Video format code.
 	format_limit:     Highest quality format to try.
 	outtmpl:          Template for output names.
 	ignoreerrors:     Do not stop on download errors.
 	ratelimit:        Download speed limit, in bytes/sec.
 	nooverwrites:     Prevent overwriting files.
 	retries:          Number of times to retry for HTTP error 5xx
 	continuedl:       Try to continue downloads if possible.
 	noprogress:       Do not print the progress bar.
 	playliststart:    Playlist item to start at.
 	playlistend:      Playlist item to end at.
 	matchtitle:       Download only matching titles.
 	rejecttitle:      Reject downloads for matching titles.
 	logtostderr:      Log messages to stderr instead of stdout.
 	consoletitle:     Display progress in console window's titlebar.
 	nopart:           Do not use temporary .part files.
 	updatetime:       Use the Last-modified header to set output file timestamps.
 	writedescription: Write the video description to a .description file
 	writeinfojson:    Write the video description to a .info.json file
 	writesubtitles:   Write the video subtitles to a .srt file
 	subtitleslang:    Language of the subtitles to download
 	"""

 	params = None
 	_ies = []
 	_pps = []
 	_download_retcode = None
 	_num_downloads = None
 	_screen_file = None

 	def __init__(self, params):
 		"""Create a FileDownloader object with the given options."""
 		self._ies = []
 		self._pps = []
 		self._download_retcode = 0
 		self._num_downloads = 0
 		self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 		self.params = params

 	@staticmethod
 	def format_bytes(bytes):
 		if bytes is None:
 			return 'N/A'
 		if type(bytes) is str:
 			bytes = float(bytes)
 		if bytes == 0.0:
 			exponent = 0
 		else:
 			exponent = long(math.log(bytes, 1024.0))
 		suffix = 'bkMGTPEZY'[exponent]
 		converted = float(bytes) / float(1024 ** exponent)
 		return '%.2f%s' % (converted, suffix)

 	@staticmethod
 	def calc_percent(byte_counter, data_len):
 		if data_len is None:
 			return '---.-%'
 		return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))

 	@staticmethod
 	def calc_eta(start, now, total, current):
 		if total is None:
 			return '--:--'
 		dif = now - start
 		if current == 0 or dif < 0.001: # One millisecond
 			return '--:--'
 		rate = float(current) / dif
 		eta = long((float(total) - float(current)) / rate)
 		(eta_mins, eta_secs) = divmod(eta, 60)
 		if eta_mins > 99:
 			return '--:--'
 		return '%02d:%02d' % (eta_mins, eta_secs)

 	@staticmethod
 	def calc_speed(start, now, bytes):
 		dif = now - start
 		if bytes == 0 or dif < 0.001: # One millisecond
 			return '%10s' % '---b/s'
 		return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))

 	@staticmethod
 	def best_block_size(elapsed_time, bytes):
 		new_min = max(bytes / 2.0, 1.0)
 		new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
 		if elapsed_time < 0.001:
 			return long(new_max)
 		rate = bytes / elapsed_time
 		if rate > new_max:
 			return long(new_max)
 		if rate < new_min:
 			return long(new_min)
 		return long(rate)

 	@staticmethod
 	def parse_bytes(bytestr):
 		"""Parse a string indicating a byte quantity into a long integer."""
 		matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
 		if matchobj is None:
 			return None
 		number = float(matchobj.group(1))
 		multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
 		return long(round(number * multiplier))

 	def add_info_extractor(self, ie):
 		"""Add an InfoExtractor object to the end of the list."""
 		self._ies.append(ie)
 		ie.set_downloader(self)

 	def add_post_processor(self, pp):
 		"""Add a PostProcessor object to the end of the chain."""
 		self._pps.append(pp)
 		pp.set_downloader(self)

 	def to_screen(self, message, skip_eol=False):
 		"""Print message to stdout if not in quiet mode."""
 		assert type(message) == type(u'')
 		if not self.params.get('quiet', False):
 			terminator = [u'\n', u''][skip_eol]
 			output = message + terminator

 			if 'b' not in self._screen_file.mode or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
 				output = output.encode(preferredencoding(), 'ignore')
 			self._screen_file.write(output)
 			self._screen_file.flush()

 	def to_stderr(self, message):
 		"""Print message to stderr."""
 		print >>sys.stderr, message.encode(preferredencoding())

 	def to_cons_title(self, message):
 		"""Set console/terminal window title to message."""
 		if not self.params.get('consoletitle', False):
 			return
 		if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 			# c_wchar_p() might not be necessary if `message` is
 			# already of type unicode()
 			ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 		elif 'TERM' in os.environ:
 			sys.stderr.write('\033]0;%s\007' % message.encode(preferredencoding()))

 	def fixed_template(self):
 		"""Checks if the output template is fixed."""
 		return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None)

 	def trouble(self, message=None):
 		"""Determine action to take when a download problem appears.

 		Depending on if the downloader has been configured to ignore
 		download errors or not, this method may throw an exception or
 		not when errors are found, after printing the message.
 		"""
 		if message is not None:
 			self.to_stderr(message)
 		if not self.params.get('ignoreerrors', False):
 			raise DownloadError(message)
 		self._download_retcode = 1

 	def slow_down(self, start_time, byte_counter):
 		"""Sleep if the download speed is over the rate limit."""
 		rate_limit = self.params.get('ratelimit', None)
 		if rate_limit is None or byte_counter == 0:
 			return
 		now = time.time()
 		elapsed = now - start_time
 		if elapsed <= 0.0:
 			return
 		speed = float(byte_counter) / elapsed
 		if speed > rate_limit:
 			time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)

 	def temp_name(self, filename):
 		"""Returns a temporary filename for the given filename."""
 		if self.params.get('nopart', False) or filename == u'-' or \
 				(os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
 			return filename
 		return filename + u'.part'

 	def undo_temp_name(self, filename):
 		if filename.endswith(u'.part'):
 			return filename[:-len(u'.part')]
 		return filename

 	def try_rename(self, old_filename, new_filename):
 		try:
 			if old_filename == new_filename:
 				return
 			os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
 		except (IOError, OSError), err:
 			self.trouble(u'ERROR: unable to rename file')

 	def try_utime(self, filename, last_modified_hdr):
 		"""Try to set the last-modified time of the given file."""
 		if last_modified_hdr is None:
 			return
 		if not os.path.isfile(encodeFilename(filename)):
 			return
 		timestr = last_modified_hdr
 		if timestr is None:
 			return
 		filetime = timeconvert(timestr)
 		if filetime is None:
 			return filetime
 		try:
 			os.utime(filename, (time.time(), filetime))
 		except:
 			pass
 		return filetime

 	def report_writedescription(self, descfn):
 		""" Report that the description file is being written """
 		self.to_screen(u'[info] Writing video description to: ' + descfn)

 	def report_writesubtitles(self, srtfn):
 		""" Report that the subtitles file is being written """
 		self.to_screen(u'[info] Writing video subtitles to: ' + srtfn)

 	def report_writeinfojson(self, infofn):
 		""" Report that the metadata file has been written """
 		self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)

 	def report_destination(self, filename):
 		"""Report destination filename."""
 		self.to_screen(u'[download] Destination: ' + filename)

 	def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
 		"""Report download progress."""
 		if self.params.get('noprogress', False):
 			return
 		self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
 				(percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
 		self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
 				(percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))

 	def report_resuming_byte(self, resume_len):
 		"""Report attempt to resume at given byte."""
 		self.to_screen(u'[download] Resuming download at byte %s' % resume_len)

 	def report_retry(self, count, retries):
 		"""Report retry in case of HTTP error 5xx"""
 		self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))

 	def report_file_already_downloaded(self, file_name):
 		"""Report file has already been fully downloaded."""
 		try:
 			self.to_screen(u'[download] %s has already been downloaded' % file_name)
 		except (UnicodeEncodeError), err:
 			self.to_screen(u'[download] The file has already been downloaded')

 	def report_unable_to_resume(self):
 		"""Report it was impossible to resume download."""
 		self.to_screen(u'[download] Unable to resume')

 	def report_finish(self):
 		"""Report download finished."""
 		if self.params.get('noprogress', False):
 			self.to_screen(u'[download] Download completed')
 		else:
 			self.to_screen(u'')

 	def increment_downloads(self):
 		"""Increment the ordinal that assigns a number to each file."""
 		self._num_downloads += 1

 	def prepare_filename(self, info_dict):
 		"""Generate the output filename."""
 		try:
 			template_dict = dict(info_dict)
 			template_dict['epoch'] = unicode(long(time.time()))
 			template_dict['autonumber'] = unicode('%05d' % self._num_downloads)
 			filename = self.params['outtmpl'] % template_dict
 			return filename
 		except (ValueError, KeyError), err:
 			self.trouble(u'ERROR: invalid system charset or erroneous output template')
 			return None

 	def _match_entry(self, info_dict):
 		""" Returns None iff the file should be downloaded """

 		title = info_dict['title']
 		matchtitle = self.params.get('matchtitle', False)
 		if matchtitle and not re.search(matchtitle, title, re.IGNORECASE):
 			return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
 		rejecttitle = self.params.get('rejecttitle', False)
 		if rejecttitle and re.search(rejecttitle, title, re.IGNORECASE):
 			return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 		return None

 	def process_info(self, info_dict):
 		"""Process a single dictionary returned by an InfoExtractor."""

 		reason = self._match_entry(info_dict)
 		if reason is not None:
 			self.to_screen(u'[download] ' + reason)
 			return

 		max_downloads = self.params.get('max_downloads')
 		if max_downloads is not None:
 			if self._num_downloads > int(max_downloads):
 				raise MaxDownloadsReached()

 		filename = self.prepare_filename(info_dict)
 		
 		# Forced printings
 		if self.params.get('forcetitle', False):
 			print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
 		if self.params.get('forceurl', False):
 			print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace')
 		if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
 			print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace')
 		if self.params.get('forcedescription', False) and 'description' in info_dict:
 			print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace')
 		if self.params.get('forcefilename', False) and filename is not None:
 			print filename.encode(preferredencoding(), 'xmlcharrefreplace')
 		if self.params.get('forceformat', False):
 			print info_dict['format'].encode(preferredencoding(), 'xmlcharrefreplace')

 		# Do nothing else if in simulate mode
 		if self.params.get('simulate', False):
 			return

 		if filename is None:
 			return

 		try:
 			dn = os.path.dirname(encodeFilename(filename))
 			if dn != '' and not os.path.exists(dn): # dn is already encoded
 				os.makedirs(dn)
 		except (OSError, IOError), err:
 			self.trouble(u'ERROR: unable to create directory ' + unicode(err))
 			return

 		if self.params.get('writedescription', False):
 			try:
 				descfn = filename + u'.description'
 				self.report_writedescription(descfn)
 				descfile = open(encodeFilename(descfn), 'wb')
 				try:
 					descfile.write(info_dict['description'].encode('utf-8'))
 				finally:
 					descfile.close()
 			except (OSError, IOError):
 				self.trouble(u'ERROR: Cannot write description file ' + descfn)
 				return
 				
 		if self.params.get('writesubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']:
 			# subtitles download errors are already managed as troubles in relevant IE
 			# that way it will silently go on when used with unsupporting IE 
 			try:
 				srtfn = filename.rsplit('.', 1)[0] + u'.srt'
 				self.report_writesubtitles(srtfn)
 				srtfile = open(encodeFilename(srtfn), 'wb')
 				try:
 					srtfile.write(info_dict['subtitles'].encode('utf-8'))
 				finally:
 					srtfile.close()
 			except (OSError, IOError):
 				self.trouble(u'ERROR: Cannot write subtitles file ' + descfn)
 				return

 		if self.params.get('writeinfojson', False):
 			infofn = filename + u'.info.json'
 			self.report_writeinfojson(infofn)
 			try:
 				json.dump
 			except (NameError,AttributeError):
 				self.trouble(u'ERROR: No JSON encoder found. Update to Python 2.6+, setup a json module, or leave out --write-info-json.')
 				return
 			try:
 				infof = open(encodeFilename(infofn), 'wb')
 				try:
 					json_info_dict = dict((k,v) for k,v in info_dict.iteritems() if not k in ('urlhandle',))
 					json.dump(json_info_dict, infof)
 				finally:
 					infof.close()
 			except (OSError, IOError):
 				self.trouble(u'ERROR: Cannot write metadata to JSON file ' + infofn)
 				return

 		if not self.params.get('skip_download', False):
 			if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
 				success = True
 			else:
 				try:
 					success = self._do_download(filename, info_dict)
 				except (OSError, IOError), err:
 					raise UnavailableVideoError
 				except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 					self.trouble(u'ERROR: unable to download video data: %s' % str(err))
 					return
 				except (ContentTooShortError, ), err:
 					self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
 					return
 	
 			if success:
 				try:
 					self.post_process(filename, info_dict)
 				except (PostProcessingError), err:
 					self.trouble(u'ERROR: postprocessing: %s' % str(err))
 					return

 	def download(self, url_list):
 		"""Download a given list of URLs."""
 		if len(url_list) > 1 and self.fixed_template():
 			raise SameFileError(self.params['outtmpl'])

 		for url in url_list:
 			suitable_found = False
 			for ie in self._ies:
 				# Go to next InfoExtractor if not suitable
 				if not ie.suitable(url):
 					continue

 				# Suitable InfoExtractor found
 				suitable_found = True

 				# Extract information from URL and process it
 				ie.extract(url)

 				# Suitable InfoExtractor had been found; go to next URL
 				break

 			if not suitable_found:
 				self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url)

 		return self._download_retcode

 	def post_process(self, filename, ie_info):
 		"""Run the postprocessing chain on the given file."""
 		info = dict(ie_info)
 		info['filepath'] = filename
 		for pp in self._pps:
 			info = pp.run(info)
 			if info is None:
 				break

 	def _download_with_rtmpdump(self, filename, url, player_url):
 		self.report_destination(filename)
 		tmpfilename = self.temp_name(filename)

 		# Check for rtmpdump first
 		try:
 			subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
 		except (OSError, IOError):
 			self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
 			return False

 		# Download using rtmpdump. rtmpdump returns exit code 2 when
 		# the connection was interrumpted and resuming appears to be
 		# possible. This is part of rtmpdump's normal usage, AFAIK.
 		basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename]
 		args = basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)]
 		if self.params.get('verbose', False):
 			try:
 				import pipes
 				shell_quote = lambda args: ' '.join(map(pipes.quote, args))
 			except ImportError:
 				shell_quote = repr
 			self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(args))
 		retval = subprocess.call(args)
 		while retval == 2 or retval == 1:
 			prevsize = os.path.getsize(encodeFilename(tmpfilename))
 			self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
 			time.sleep(5.0) # This seems to be needed
 			retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
 			cursize = os.path.getsize(encodeFilename(tmpfilename))
 			if prevsize == cursize and retval == 1:
 				break
 			 # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
 			if prevsize == cursize and retval == 2 and cursize > 1024:
 				self.to_screen(u'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
 				retval = 0
 				break
 		if retval == 0:
 			self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(encodeFilename(tmpfilename)))
 			self.try_rename(tmpfilename, filename)
 			return True
 		else:
 			self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval)
 			return False

 	def _do_download(self, filename, info_dict):
 		url = info_dict['url']
 		player_url = info_dict.get('player_url', None)

 		# Check file already present
 		if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False):
 			self.report_file_already_downloaded(filename)
 			return True

 		# Attempt to download using rtmpdump
 		if url.startswith('rtmp'):
 			return self._download_with_rtmpdump(filename, url, player_url)

 		tmpfilename = self.temp_name(filename)
 		stream = None

 		# Do not include the Accept-Encoding header
 		headers = {'Youtubedl-no-compression': 'True'}
 		basic_request = urllib2.Request(url, None, headers)
 		request = urllib2.Request(url, None, headers)

 		# Establish possible resume length
 		if os.path.isfile(encodeFilename(tmpfilename)):
 			resume_len = os.path.getsize(encodeFilename(tmpfilename))
 		else:
 			resume_len = 0

 		open_mode = 'wb'
 		if resume_len != 0:
 			if self.params.get('continuedl', False):
 				self.report_resuming_byte(resume_len)
 				request.add_header('Range','bytes=%d-' % resume_len)
 				open_mode = 'ab'
 			else:
 				resume_len = 0

 		count = 0
 		retries = self.params.get('retries', 0)
 		while count <= retries:
 			# Establish connection
 			try:
 				if count == 0 and 'urlhandle' in info_dict:
 					data = info_dict['urlhandle']
 				data = urllib2.urlopen(request)
 				break
 			except (urllib2.HTTPError, ), err:
 				if (err.code < 500 or err.code >= 600) and err.code != 416:
 					# Unexpected HTTP error
 					raise
 				elif err.code == 416:
 					# Unable to resume (requested range not satisfiable)
 					try:
 						# Open the connection again without the range header
 						data = urllib2.urlopen(basic_request)
 						content_length = data.info()['Content-Length']
 					except (urllib2.HTTPError, ), err:
 						if err.code < 500 or err.code >= 600:
 							raise
 					else:
 						# Examine the reported length
 						if (content_length is not None and
 								(resume_len - 100 < long(content_length) < resume_len + 100)):
 							# The file had already been fully downloaded.
 							# Explanation to the above condition: in issue #175 it was revealed that
 							# YouTube sometimes adds or removes a few bytes from the end of the file,
 							# changing the file size slightly and causing problems for some users. So
 							# I decided to implement a suggested change and consider the file
 							# completely downloaded if the file size differs less than 100 bytes from
 							# the one in the hard drive.
 							self.report_file_already_downloaded(filename)
 							self.try_rename(tmpfilename, filename)
 							return True
 						else:
 							# The length does not match, we start the download over
 							self.report_unable_to_resume()
 							open_mode = 'wb'
 							break
 			# Retry
 			count += 1
 			if count <= retries:
 				self.report_retry(count, retries)

 		if count > retries:
 			self.trouble(u'ERROR: giving up after %s retries' % retries)
 			return False

 		data_len = data.info().get('Content-length', None)
 		if data_len is not None:
 			data_len = long(data_len) + resume_len
 		data_len_str = self.format_bytes(data_len)
 		byte_counter = 0 + resume_len
 		block_size = 1024
 		start = time.time()
 		while True:
 			# Download and write
 			before = time.time()
 			data_block = data.read(block_size)
 			after = time.time()
 			if len(data_block) == 0:
 				break
 			byte_counter += len(data_block)

 			# Open file just in time
 			if stream is None:
 				try:
 					(stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
 					assert stream is not None
 					filename = self.undo_temp_name(tmpfilename)
 					self.report_destination(filename)
 				except (OSError, IOError), err:
 					self.trouble(u'ERROR: unable to open for writing: %s' % str(err))
 					return False
 			try:
 				stream.write(data_block)
 			except (IOError, OSError), err:
 				self.trouble(u'\nERROR: unable to write data: %s' % str(err))
 				return False
 			block_size = self.best_block_size(after - before, len(data_block))

 			# Progress message
 			speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
 			if data_len is None:
 				self.report_progress('Unknown %', data_len_str, speed_str, 'Unknown ETA')
 			else:
 				percent_str = self.calc_percent(byte_counter, data_len)
 				eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
 				self.report_progress(percent_str, data_len_str, speed_str, eta_str)

 			# Apply rate limit
 			self.slow_down(start, byte_counter - resume_len)

 		if stream is None:
 			self.trouble(u'\nERROR: Did not get any data blocks')
 			return False
 		stream.close()
 		self.report_finish()
 		if data_len is not None and byte_counter != data_len:
 			raise ContentTooShortError(byte_counter, long(data_len))
 		self.try_rename(tmpfilename, filename)

 		# Update file modification time
 		if self.params.get('updatetime', True):
 			info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))

 		return True
--- a/youtube_dl/InfoExtractors.py
+++ b/youtube_dl/InfoExtractors.py
--- a/youtube_dl/PostProcessing.py
+++ b/youtube_dl/PostProcessing.py
@ -0,0 +1,185 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-

 import os
 import subprocess
 import sys
 import time

 from Utils import *


 class PostProcessor(object):
 	"""Post Processor class.

 	PostProcessor objects can be added to downloaders with their
 	add_post_processor() method. When the downloader has finished a
 	successful download, it will take its internal chain of PostProcessors
 	and start calling the run() method on each one of them, first with
 	an initial argument and then with the returned value of the previous
 	PostProcessor.

 	The chain will be stopped if one of them ever returns None or the end
 	of the chain is reached.

 	PostProcessor objects follow a "mutual registration" process similar
 	to InfoExtractor objects.
 	"""

 	_downloader = None

 	def __init__(self, downloader=None):
 		self._downloader = downloader

 	def set_downloader(self, downloader):
 		"""Sets the downloader for this PP."""
 		self._downloader = downloader

 	def run(self, information):
 		"""Run the PostProcessor.

 		The "information" argument is a dictionary like the ones
 		composed by InfoExtractors. The only difference is that this
 		one has an extra field called "filepath" that points to the
 		downloaded file.

 		When this method returns None, the postprocessing chain is
 		stopped. However, this method may return an information
 		dictionary that will be passed to the next postprocessing
 		object in the chain. It can be the one it received after
 		changing some fields.

 		In addition, this method may raise a PostProcessingError
 		exception that will be taken into account by the downloader
 		it was called from.
 		"""
 		return information # by default, do nothing

 class AudioConversionError(BaseException):
 	def __init__(self, message):
 		self.message = message

 class FFmpegExtractAudioPP(PostProcessor):

 	def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, keepvideo=False):
 		PostProcessor.__init__(self, downloader)
 		if preferredcodec is None:
 			preferredcodec = 'best'
 		self._preferredcodec = preferredcodec
 		self._preferredquality = preferredquality
 		self._keepvideo = keepvideo

 	@staticmethod
 	def get_audio_codec(path):
 		try:
 			cmd = ['ffprobe', '-show_streams', '--', encodeFilename(path)]
 			handle = subprocess.Popen(cmd, stderr=file(os.path.devnull, 'w'), stdout=subprocess.PIPE)
 			output = handle.communicate()[0]
 			if handle.wait() != 0:
 				return None
 		except (IOError, OSError):
 			return None
 		audio_codec = None
 		for line in output.split('\n'):
 			if line.startswith('codec_name='):
 				audio_codec = line.split('=')[1].strip()
 			elif line.strip() == 'codec_type=audio' and audio_codec is not None:
 				return audio_codec
 		return None

 	@staticmethod
 	def run_ffmpeg(path, out_path, codec, more_opts):
 		if codec is None:
 			acodec_opts = []
 		else:
 			acodec_opts = ['-acodec', codec]
 		cmd = ['ffmpeg', '-y', '-i', encodeFilename(path), '-vn'] + acodec_opts + more_opts + ['--', encodeFilename(out_path)]
 		try:
 			p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 			stdout,stderr = p.communicate()
 		except (IOError, OSError):
 			e = sys.exc_info()[1]
 			if isinstance(e, OSError) and e.errno == 2:
 				raise AudioConversionError('ffmpeg not found. Please install ffmpeg.')
 			else:
 				raise e
 		if p.returncode != 0:
 			msg = stderr.strip().split('\n')[-1]
 			raise AudioConversionError(msg)

 	def run(self, information):
 		path = information['filepath']

 		filecodec = self.get_audio_codec(path)
 		if filecodec is None:
 			self._downloader.to_stderr(u'WARNING: unable to obtain file audio codec with ffprobe')
 			return None

 		more_opts = []
 		if self._preferredcodec == 'best' or self._preferredcodec == filecodec or (self._preferredcodec == 'm4a' and filecodec == 'aac'):
 			if self._preferredcodec == 'm4a' and filecodec == 'aac':
 				# Lossless, but in another container
 				acodec = 'copy'
 				extension = self._preferredcodec
 				more_opts = ['-absf', 'aac_adtstoasc']
 			elif filecodec in ['aac', 'mp3', 'vorbis']:
 				# Lossless if possible
 				acodec = 'copy'
 				extension = filecodec
 				if filecodec == 'aac':
 					more_opts = ['-f', 'adts']
 				if filecodec == 'vorbis':
 					extension = 'ogg'
 			else:
 				# MP3 otherwise.
 				acodec = 'libmp3lame'
 				extension = 'mp3'
 				more_opts = []
 				if self._preferredquality is not None:
 					more_opts += ['-ab', self._preferredquality]
 		else:
 			# We convert the audio (lossy)
 			acodec = {'mp3': 'libmp3lame', 'aac': 'aac', 'm4a': 'aac', 'vorbis': 'libvorbis', 'wav': None}[self._preferredcodec]
 			extension = self._preferredcodec
 			more_opts = []
 			if self._preferredquality is not None:
 				more_opts += ['-ab', self._preferredquality]
 			if self._preferredcodec == 'aac':
 				more_opts += ['-f', 'adts']
 			if self._preferredcodec == 'm4a':
 				more_opts += ['-absf', 'aac_adtstoasc']
 			if self._preferredcodec == 'vorbis':
 				extension = 'ogg'
 			if self._preferredcodec == 'wav':
 				extension = 'wav'
 				more_opts += ['-f', 'wav']

 		prefix, sep, ext = path.rpartition(u'.') # not os.path.splitext, since the latter does not work on unicode in all setups
 		new_path = prefix + sep + extension
 		self._downloader.to_screen(u'[ffmpeg] Destination: ' + new_path)
 		try:
 			self.run_ffmpeg(path, new_path, acodec, more_opts)
 		except:
 			etype,e,tb = sys.exc_info()
 			if isinstance(e, AudioConversionError):
 				self._downloader.to_stderr(u'ERROR: audio conversion failed: ' + e.message)
 			else:
 				self._downloader.to_stderr(u'ERROR: error running ffmpeg')
 			return None

 		# Try to update the date time for extracted audio file.
 		if information.get('filetime') is not None:
 			try:
 				os.utime(encodeFilename(new_path), (time.time(), information['filetime']))
 			except:
 				self._downloader.to_stderr(u'WARNING: Cannot update utime of audio file')

 		if not self._keepvideo:
 			try:
 				os.remove(encodeFilename(path))
 			except (IOError, OSError):
 				self._downloader.to_stderr(u'WARNING: Unable to remove downloaded video file')
 				return None

 		information['filepath'] = new_path
 		return information
--- a/youtube_dl/Utils.py
+++ b/youtube_dl/Utils.py
@ -0,0 +1,375 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-

 import gzip
 import htmlentitydefs
 import HTMLParser
 import locale
 import os
 import re
 import sys
 import zlib
 import urllib2
 import email.utils

 try:
 	import cStringIO as StringIO
 except ImportError:
 	import StringIO

 std_headers = {
 	'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:5.0.1) Gecko/20100101 Firefox/5.0.1',
 	'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
 	'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
 	'Accept-Encoding': 'gzip, deflate',
 	'Accept-Language': 'en-us,en;q=0.5',
 }

 def preferredencoding():
 	"""Get preferred encoding.

 	Returns the best encoding scheme for the system, based on
 	locale.getpreferredencoding() and some further tweaks.
 	"""
 	def yield_preferredencoding():
 		try:
 			pref = locale.getpreferredencoding()
 			u'TEST'.encode(pref)
 		except:
 			pref = 'UTF-8'
 		while True:
 			yield pref
 	return yield_preferredencoding().next()


 def htmlentity_transform(matchobj):
 	"""Transforms an HTML entity to a Unicode character.

 	This function receives a match object and is intended to be used with
 	the re.sub() function.
 	"""
 	entity = matchobj.group(1)

 	# Known non-numeric HTML entity
 	if entity in htmlentitydefs.name2codepoint:
 		return unichr(htmlentitydefs.name2codepoint[entity])

 	# Unicode character
 	mobj = re.match(ur'(?u)#(x?\d+)', entity)
 	if mobj is not None:
 		numstr = mobj.group(1)
 		if numstr.startswith(u'x'):
 			base = 16
 			numstr = u'0%s' % numstr
 		else:
 			base = 10
 		return unichr(long(numstr, base))

 	# Unknown entity in name, return its literal representation
 	return (u'&%s;' % entity)


 def sanitize_title(utitle):
 	"""Sanitizes a video title so it could be used as part of a filename."""
 	utitle = re.sub(ur'(?u)&(.+?);', htmlentity_transform, utitle)
 	return utitle.replace(unicode(os.sep), u'%')


 def sanitize_open(filename, open_mode):
 	"""Try to open the given filename, and slightly tweak it if this fails.

 	Attempts to open the given filename. If this fails, it tries to change
 	the filename slightly, step by step, until it's either able to open it
 	or it fails and raises a final exception, like the standard open()
 	function.

 	It returns the tuple (stream, definitive_file_name).
 	"""
 	try:
 		if filename == u'-':
 			if sys.platform == 'win32':
 				import msvcrt
 				msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
 			return (sys.stdout, filename)
 		stream = open(encodeFilename(filename), open_mode)
 		return (stream, filename)
 	except (IOError, OSError), err:
 		# In case of error, try to remove win32 forbidden chars
 		filename = re.sub(ur'[/<>:"\|\?\*]', u'#', filename)

 		# An exception here should be caught in the caller
 		stream = open(encodeFilename(filename), open_mode)
 		return (stream, filename)


 def timeconvert(timestr):
 	"""Convert RFC 2822 defined time string into system timestamp"""
 	timestamp = None
 	timetuple = email.utils.parsedate_tz(timestr)
 	if timetuple is not None:
 		timestamp = email.utils.mktime_tz(timetuple)
 	return timestamp

 def simplify_title(title):
 	expr = re.compile(ur'[^\w\d_\-]+', flags=re.UNICODE)
 	return expr.sub(u'_', title).strip(u'_')

 def orderedSet(iterable):
 	""" Remove all duplicates from the input iterable """
 	res = []
 	for el in iterable:
 		if el not in res:
 			res.append(el)
 	return res

 def unescapeHTML(s):
 	"""
 	@param s a string (of type unicode)
 	"""
 	assert type(s) == type(u'')

 	htmlParser = HTMLParser.HTMLParser()
 	return htmlParser.unescape(s)

 def encodeFilename(s):
 	"""
 	@param s The name of the file (of type unicode)
 	"""

 	assert type(s) == type(u'')

 	if sys.platform == 'win32' and sys.getwindowsversion().major >= 5:
 		# Pass u'' directly to use Unicode APIs on Windows 2000 and up
 		# (Detecting Windows NT 4 is tricky because 'major >= 4' would
 		# match Windows 9x series as well. Besides, NT 4 is obsolete.)
 		return s
 	else:
 		return s.encode(sys.getfilesystemencoding(), 'ignore')

 class DownloadError(Exception):
 	"""Download Error exception.

 	This exception may be thrown by FileDownloader objects if they are not
 	configured to continue on errors. They will contain the appropriate
 	error message.
 	"""
 	pass


 class SameFileError(Exception):
 	"""Same File exception.

 	This exception will be thrown by FileDownloader objects if they detect
 	multiple files would have to be downloaded to the same file on disk.
 	"""
 	pass


 class PostProcessingError(Exception):
 	"""Post Processing exception.

 	This exception may be raised by PostProcessor's .run() method to
 	indicate an error in the postprocessing task.
 	"""
 	pass

 class MaxDownloadsReached(Exception):
 	""" --max-downloads limit has been reached. """
 	pass


 class UnavailableVideoError(Exception):
 	"""Unavailable Format exception.

 	This exception will be thrown when a video is requested
 	in a format that is not available for that video.
 	"""
 	pass


 class ContentTooShortError(Exception):
 	"""Content Too Short exception.

 	This exception may be raised by FileDownloader objects when a file they
 	download is too small for what the server announced first, indicating
 	the connection was probably interrupted.
 	"""
 	# Both in bytes
 	downloaded = None
 	expected = None

 	def __init__(self, downloaded, expected):
 		self.downloaded = downloaded
 		self.expected = expected


 class YoutubeDLHandler(urllib2.HTTPHandler):
 	"""Handler for HTTP requests and responses.

 	This class, when installed with an OpenerDirector, automatically adds
 	the standard headers to every HTTP request and handles gzipped and
 	deflated responses from web servers. If compression is to be avoided in
 	a particular request, the original request in the program code only has
 	to include the HTTP header "Youtubedl-No-Compression", which will be
 	removed before making the real request.

 	Part of this code was copied from:

 	http://techknack.net/python-urllib2-handlers/

 	Andrew Rowls, the author of that code, agreed to release it to the
 	public domain.
 	"""

 	@staticmethod
 	def deflate(data):
 		try:
 			return zlib.decompress(data, -zlib.MAX_WBITS)
 		except zlib.error:
 			return zlib.decompress(data)

 	@staticmethod
 	def addinfourl_wrapper(stream, headers, url, code):
 		if hasattr(urllib2.addinfourl, 'getcode'):
 			return urllib2.addinfourl(stream, headers, url, code)
 		ret = urllib2.addinfourl(stream, headers, url)
 		ret.code = code
 		return ret

 	def http_request(self, req):
 		for h in std_headers:
 			if h in req.headers:
 				del req.headers[h]
 			req.add_header(h, std_headers[h])
 		if 'Youtubedl-no-compression' in req.headers:
 			if 'Accept-encoding' in req.headers:
 				del req.headers['Accept-encoding']
 			del req.headers['Youtubedl-no-compression']
 		return req

 	def http_response(self, req, resp):
 		old_resp = resp
 		# gzip
 		if resp.headers.get('Content-encoding', '') == 'gzip':
 			gz = gzip.GzipFile(fileobj=StringIO.StringIO(resp.read()), mode='r')
 			resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
 			resp.msg = old_resp.msg
 		# deflate
 		if resp.headers.get('Content-encoding', '') == 'deflate':
 			gz = StringIO.StringIO(self.deflate(resp.read()))
 			resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
 			resp.msg = old_resp.msg
 		return resp
 		
 try:
 	import json
 except ImportError: # Python <2.6, use trivialjson (https://github.com/phihag/trivialjson):
 	import re
 	class json(object):
 		@staticmethod
 		def loads(s):
 			s = s.decode('UTF-8')
 			def raiseError(msg, i):
 				raise ValueError(msg + ' at position ' + str(i) + ' of ' + repr(s) + ': ' + repr(s[i:]))
 			def skipSpace(i, expectMore=True):
 				while i < len(s) and s[i] in ' \t\r\n':
 					i += 1
 				if expectMore:
 					if i >= len(s):
 						raiseError('Premature end', i)
 				return i
 			def decodeEscape(match):
 				esc = match.group(1)
 				_STATIC = {
 					'"': '"',
 					'\\': '\\',
 					'/': '/',
 					'b': unichr(0x8),
 					'f': unichr(0xc),
 					'n': '\n',
 					'r': '\r',
 					't': '\t',
 				}
 				if esc in _STATIC:
 					return _STATIC[esc]
 				if esc[0] == 'u':
 					if len(esc) == 1+4:
 						return unichr(int(esc[1:5], 16))
 					if len(esc) == 5+6 and esc[5:7] == '\\u':
 						hi = int(esc[1:5], 16)
 						low = int(esc[7:11], 16)
 						return unichr((hi - 0xd800) * 0x400 + low - 0xdc00 + 0x10000)
 				raise ValueError('Unknown escape ' + str(esc))
 			def parseString(i):
 				i += 1
 				e = i
 				while True:
 					e = s.index('"', e)
 					bslashes = 0
 					while s[e-bslashes-1] == '\\':
 						bslashes += 1
 					if bslashes % 2 == 1:
 						e += 1
 						continue
 					break
 				rexp = re.compile(r'\\(u[dD][89aAbB][0-9a-fA-F]{2}\\u[0-9a-fA-F]{4}|u[0-9a-fA-F]{4}|.|$)')
 				stri = rexp.sub(decodeEscape, s[i:e])
 				return (e+1,stri)
 			def parseObj(i):
 				i += 1
 				res = {}
 				i = skipSpace(i)
 				if s[i] == '}': # Empty dictionary
 					return (i+1,res)
 				while True:
 					if s[i] != '"':
 						raiseError('Expected a string object key', i)
 					i,key = parseString(i)
 					i = skipSpace(i)
 					if i >= len(s) or s[i] != ':':
 						raiseError('Expected a colon', i)
 					i,val = parse(i+1)
 					res[key] = val
 					i = skipSpace(i)
 					if s[i] == '}':
 						return (i+1, res)
 					if s[i] != ',':
 						raiseError('Expected comma or closing curly brace', i)
 					i = skipSpace(i+1)
 			def parseArray(i):
 				res = []
 				i = skipSpace(i+1)
 				if s[i] == ']': # Empty array
 					return (i+1,res)
 				while True:
 					i,val = parse(i)
 					res.append(val)
 					i = skipSpace(i) # Raise exception if premature end
 					if s[i] == ']':
 						return (i+1, res)
 					if s[i] != ',':
 						raiseError('Expected a comma or closing bracket', i)
 					i = skipSpace(i+1)
 			def parseDiscrete(i):
 				for k,v in {'true': True, 'false': False, 'null': None}.items():
 					if s.startswith(k, i):
 						return (i+len(k), v)
 				raiseError('Not a boolean (or null)', i)
 			def parseNumber(i):
 				mobj = re.match('^(-?(0|[1-9][0-9]*)(\.[0-9]*)?([eE][+-]?[0-9]+)?)', s[i:])
 				if mobj is None:
 					raiseError('Not a number', i)
 				nums = mobj.group(1)
 				if '.' in nums or 'e' in nums or 'E' in nums:
 					return (i+len(nums), float(nums))
 				return (i+len(nums), int(nums))
 			CHARMAP = {'{': parseObj, '[': parseArray, '"': parseString, 't': parseDiscrete, 'f': parseDiscrete, 'n': parseDiscrete}
 			def parse(i):
 				i = skipSpace(i)
 				i,res = CHARMAP.get(s[i], parseNumber)(i)
 				i = skipSpace(i, False)
 				return (i,res)
 			i,res = parse(0)
 			if i < len(s):
 				raise ValueError('Extra data at end of input (index ' + str(i) + ' of ' + repr(s) + ': ' + repr(s[i:]) + ')')
 			return res
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
--- a/youtube_dl/main.py
+++ b/youtube_dl/main.py
@ -0,0 +1,7 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-

 import __init__

 if __name__ == '__main__':
 	__init__.main()