Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
#!/usr/bin/env python # -*- coding: utf-8 -*-
import ctypes
"""File Downloader class.
File downloader objects are the ones responsible of downloading the actual video file and writing it to disk if the user has requested it, among some other tasks. In most cases there should be one per program. As, given a video URL, the downloader doesn't know how to extract all the needed information, task that InfoExtractors do, it has to pass the URL to one of them.
For this, file downloader objects have a method that allows InfoExtractors to be registered in a given order. When it is passed a URL, the file downloader handles it to the first InfoExtractor it finds that reports being able to handle it. The InfoExtractor extracts all the information about the video or videos the URL refers to, and asks the FileDownloader to process the video information, possibly downloading the video.
File downloaders accept a lot of parameters. In order not to saturate the object constructor with arguments, it receives a dictionary of options instead. These options are available through the params attribute for the InfoExtractors to use. The FileDownloader also registers itself as the downloader in charge for the InfoExtractors that are added to it, so this is a "mutual registration".
Available options:
username: Username for authentication purposes. password: Password for authentication purposes. usenetrc: Use netrc for authentication instead. quiet: Do not print messages to stdout. forceurl: Force printing final URL. forcetitle: Force printing title. forcethumbnail: Force printing thumbnail URL. forcedescription: Force printing description. forcefilename: Force printing final filename. simulate: Do not download the video files. format: Video format code. format_limit: Highest quality format to try. outtmpl: Template for output names. restrictfilenames: Do not allow "&" and spaces in file names ignoreerrors: Do not stop on download errors. ratelimit: Download speed limit, in bytes/sec. nooverwrites: Prevent overwriting files. retries: Number of times to retry for HTTP error 5xx buffersize: Size of download buffer in bytes. noresizebuffer: Do not automatically resize the download buffer. continuedl: Try to continue downloads if possible. noprogress: Do not print the progress bar. playliststart: Playlist item to start at. playlistend: Playlist item to end at. matchtitle: Download only matching titles. rejecttitle: Reject downloads for matching titles. logtostderr: Log messages to stderr instead of stdout. consoletitle: Display progress in console window's titlebar. nopart: Do not use temporary .part files. updatetime: Use the Last-modified header to set output file timestamps. writedescription: Write the video description to a .description file writeinfojson: Write the video description to a .info.json file writesubtitles: Write the video subtitles to a .srt file subtitleslang: Language of the subtitles to download test: Download only first bytes to test the downloader. """
"""Create a FileDownloader object with the given options."""
self.to_stderr(u'WARNING: %(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
def format_bytes(bytes): return 'N/A' bytes = float(bytes) exponent = 0 else:
def calc_percent(byte_counter, data_len): return '---.-%'
def calc_eta(start, now, total, current): return '--:--' return '--:--'
def calc_speed(start, now, bytes):
def best_block_size(elapsed_time, bytes):
def parse_bytes(bytestr): """Parse a string indicating a byte quantity into an integer.""" matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr) if matchobj is None: return None number = float(matchobj.group(1)) multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower()) return int(round(number * multiplier))
"""Add an InfoExtractor object to the end of the list."""
"""Add a PostProcessor object to the end of the chain.""" self._pps.append(pp) pp.set_downloader(self)
"""Print message to stdout if not in quiet mode."""
"""Print message to stderr.""" assert type(message) == type(u'') output = message + u'\n' if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr output = output.encode(preferredencoding()) sys.stderr.write(output)
"""Set console/terminal window title to message.""" if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow(): # c_wchar_p() might not be necessary if `message` is # already of type unicode() ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message)) elif 'TERM' in os.environ: sys.stderr.write('\033]0;%s\007' % message.encode(preferredencoding()))
"""Checks if the output template is fixed.""" return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None)
"""Determine action to take when a download problem appears.
Depending on if the downloader has been configured to ignore download errors or not, this method may throw an exception or not when errors are found, after printing the message. """ if message is not None: self.to_stderr(message) if self.params.get('verbose'): self.to_stderr(u''.join(traceback.format_list(traceback.extract_stack()))) if not self.params.get('ignoreerrors', False): raise DownloadError(message) self._download_retcode = 1
"""Sleep if the download speed is over the rate limit.""" now = time.time() elapsed = now - start_time if elapsed <= 0.0: return speed = float(byte_counter) / elapsed if speed > rate_limit: time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
"""Returns a temporary filename for the given filename.""" (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))): return filename
return filename
return except (IOError, OSError) as err: self.trouble(u'ERROR: unable to rename file')
"""Try to set the last-modified time of the given file.""" return return return return filetime except: pass
""" Report that the description file is being written """ self.to_screen(u'[info] Writing video description to: ' + descfn)
""" Report that the subtitles file is being written """
""" Report that the metadata file has been written """ self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
"""Report destination filename."""
"""Report download progress.""" return (percent_str, data_len_str, speed_str, eta_str), skip_eol=True) (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
"""Report attempt to resume at given byte.""" self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
"""Report retry in case of HTTP error 5xx""" self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
"""Report file has already been fully downloaded.""" except (UnicodeEncodeError) as err: self.to_screen(u'[download] The file has already been downloaded')
"""Report it was impossible to resume download.""" self.to_screen(u'[download] Unable to resume')
"""Report download finished.""" self.to_screen(u'[download] Download completed') else:
"""Increment the ordinal that assigns a number to each file."""
"""Generate the output filename."""
u'NA' if v is None else compat_str(v), restricted=self.params.get('restrictfilenames'), is_id=(k==u'id'))
except (ValueError, KeyError) as err: self.trouble(u'ERROR: invalid system charset or erroneous output template') return None
""" Returns None iff the file should be downloaded """
matchtitle = matchtitle.decode('utf8') if not re.search(matchtitle, title, re.IGNORECASE): return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"' rejecttitle = rejecttitle.decode('utf8') if re.search(rejecttitle, title, re.IGNORECASE): return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
"""Process a single dictionary returned by an InfoExtractor."""
# Keep for backwards compatibility
self.to_screen(u'[download] ' + reason) return
if self._num_downloads > int(max_downloads): raise MaxDownloadsReached()
# Forced printings compat_print(info_dict['title']) compat_print(info_dict['url']) compat_print(info_dict['thumbnail']) compat_print(info_dict['description']) compat_print(filename) compat_print(info_dict['format'])
# Do nothing else if in simulate mode return
return
os.makedirs(dn) except (OSError, IOError) as err: self.trouble(u'ERROR: unable to create directory ' + compat_str(err)) return
try: descfn = filename + u'.description' self.report_writedescription(descfn) descfile = open(encodeFilename(descfn), 'wb') try: descfile.write(info_dict['description'].encode('utf-8')) finally: descfile.close() except (OSError, IOError): self.trouble(u'ERROR: Cannot write description file ' + descfn) return
# subtitles download errors are already managed as troubles in relevant IE # that way it will silently go on when used with unsupporting IE finally: except (OSError, IOError): self.trouble(u'ERROR: Cannot write subtitles file ' + descfn) return
infofn = filename + u'.info.json' self.report_writeinfojson(infofn) try: json.dump except (NameError,AttributeError): self.trouble(u'ERROR: No JSON encoder found. Update to Python 2.6+, setup a json module, or leave out --write-info-json.') return try: infof = open(encodeFilename(infofn), 'w') try: json_info_dict = dict((k, info_dict[k]) for k in info_dict if not k in ['urlhandle']) json.dump(json_info_dict, infof) finally: infof.close() except (OSError, IOError): self.trouble(u'ERROR: Cannot write metadata to JSON file ' + infofn) return
success = True else: except (OSError, IOError) as err: raise UnavailableVideoError() except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: self.trouble(u'ERROR: unable to download video data: %s' % str(err)) return except (ContentTooShortError, ) as err: self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded)) return
except (PostProcessingError) as err: self.trouble(u'ERROR: postprocessing: %s' % str(err)) return
"""Download a given list of URLs.""" raise SameFileError(self.params['outtmpl'])
# Go to next InfoExtractor if not suitable
# Warn if the _WORKING attribute is False self.trouble(u'WARNING: the program functionality for this site has been marked as broken, ' u'and will probably not work. If you want to go on, use the -i option.')
# Suitable InfoExtractor found
# Extract information from URL and process it except UnavailableVideoError: self.trouble(u'\nERROR: unable to download video')
# Suitable InfoExtractor had been found; go to next URL
self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url)
"""Run the postprocessing chain on the given file.""" info = pp.run(info) if info is None: break
self.report_destination(filename) tmpfilename = self.temp_name(filename)
# Check for rtmpdump first try: subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT) except (OSError, IOError): self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run') return False
# Download using rtmpdump. rtmpdump returns exit code 2 when # the connection was interrumpted and resuming appears to be # possible. This is part of rtmpdump's normal usage, AFAIK. basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename] args = basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)] if self.params.get('verbose', False): try: import pipes shell_quote = lambda args: ' '.join(map(pipes.quote, args)) except ImportError: shell_quote = repr self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(args)) retval = subprocess.call(args) while retval == 2 or retval == 1: prevsize = os.path.getsize(encodeFilename(tmpfilename)) self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True) time.sleep(5.0) # This seems to be needed retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1]) cursize = os.path.getsize(encodeFilename(tmpfilename)) if prevsize == cursize and retval == 1: break # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those if prevsize == cursize and retval == 2 and cursize > 1024: self.to_screen(u'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.') retval = 0 break if retval == 0: self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(encodeFilename(tmpfilename))) self.try_rename(tmpfilename, filename) return True else: self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval) return False
# Check file already present
# Attempt to download using rtmpdump return self._download_with_rtmpdump(filename, url, player_url)
# Do not include the Accept-Encoding header
# Establish possible resume length resume_len = os.path.getsize(encodeFilename(tmpfilename)) else:
if self.params.get('continuedl', False): self.report_resuming_byte(resume_len) request.add_header('Range','bytes=%d-' % resume_len) open_mode = 'ab' else: resume_len = 0
# Establish connection data = info_dict['urlhandle'] except (compat_urllib_error.HTTPError, ) as err: if (err.code < 500 or err.code >= 600) and err.code != 416: # Unexpected HTTP error raise elif err.code == 416: # Unable to resume (requested range not satisfiable) try: # Open the connection again without the range header data = compat_urllib_request.urlopen(basic_request) content_length = data.info()['Content-Length'] except (compat_urllib_error.HTTPError, ) as err: if err.code < 500 or err.code >= 600: raise else: # Examine the reported length if (content_length is not None and (resume_len - 100 < int(content_length) < resume_len + 100)): # The file had already been fully downloaded. # Explanation to the above condition: in issue #175 it was revealed that # YouTube sometimes adds or removes a few bytes from the end of the file, # changing the file size slightly and causing problems for some users. So # I decided to implement a suggested change and consider the file # completely downloaded if the file size differs less than 100 bytes from # the one in the hard drive. self.report_file_already_downloaded(filename) self.try_rename(tmpfilename, filename) return True else: # The length does not match, we start the download over self.report_unable_to_resume() open_mode = 'wb' break # Retry count += 1 if count <= retries: self.report_retry(count, retries)
self.trouble(u'ERROR: giving up after %s retries' % retries) return False
# Download and write
# Open file just in time except (OSError, IOError) as err: self.trouble(u'ERROR: unable to open for writing: %s' % str(err)) return False except (IOError, OSError) as err: self.trouble(u'\nERROR: unable to write data: %s' % str(err)) return False
# Progress message self.report_progress('Unknown %', data_len_str, speed_str, 'Unknown ETA') else:
# Apply rate limit
self.trouble(u'\nERROR: Did not get any data blocks') return False raise ContentTooShortError(byte_counter, int(data_len))
# Update file modification time
|