You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

191 lines
7.8 KiB

  1. import os
  2. import re
  3. import subprocess
  4. import sys
  5. import time
  6. from .common import FileDownloader
  7. from ..utils import (
  8. compat_urllib_request,
  9. compat_urllib_error,
  10. ContentTooShortError,
  11. encodeFilename,
  12. sanitize_open,
  13. format_bytes,
  14. )
  15. class HttpFD(FileDownloader):
  16. def real_download(self, filename, info_dict):
  17. url = info_dict['url']
  18. tmpfilename = self.temp_name(filename)
  19. stream = None
  20. # Do not include the Accept-Encoding header
  21. headers = {'Youtubedl-no-compression': 'True'}
  22. if 'user_agent' in info_dict:
  23. headers['Youtubedl-user-agent'] = info_dict['user_agent']
  24. basic_request = compat_urllib_request.Request(url, None, headers)
  25. request = compat_urllib_request.Request(url, None, headers)
  26. if self.params.get('test', False):
  27. request.add_header('Range','bytes=0-10240')
  28. # Establish possible resume length
  29. if os.path.isfile(encodeFilename(tmpfilename)):
  30. resume_len = os.path.getsize(encodeFilename(tmpfilename))
  31. else:
  32. resume_len = 0
  33. open_mode = 'wb'
  34. if resume_len != 0:
  35. if self.params.get('continuedl', False):
  36. self.report_resuming_byte(resume_len)
  37. request.add_header('Range','bytes=%d-' % resume_len)
  38. open_mode = 'ab'
  39. else:
  40. resume_len = 0
  41. count = 0
  42. retries = self.params.get('retries', 0)
  43. while count <= retries:
  44. # Establish connection
  45. try:
  46. if count == 0 and 'urlhandle' in info_dict:
  47. data = info_dict['urlhandle']
  48. data = compat_urllib_request.urlopen(request)
  49. break
  50. except (compat_urllib_error.HTTPError, ) as err:
  51. if (err.code < 500 or err.code >= 600) and err.code != 416:
  52. # Unexpected HTTP error
  53. raise
  54. elif err.code == 416:
  55. # Unable to resume (requested range not satisfiable)
  56. try:
  57. # Open the connection again without the range header
  58. data = compat_urllib_request.urlopen(basic_request)
  59. content_length = data.info()['Content-Length']
  60. except (compat_urllib_error.HTTPError, ) as err:
  61. if err.code < 500 or err.code >= 600:
  62. raise
  63. else:
  64. # Examine the reported length
  65. if (content_length is not None and
  66. (resume_len - 100 < int(content_length) < resume_len + 100)):
  67. # The file had already been fully downloaded.
  68. # Explanation to the above condition: in issue #175 it was revealed that
  69. # YouTube sometimes adds or removes a few bytes from the end of the file,
  70. # changing the file size slightly and causing problems for some users. So
  71. # I decided to implement a suggested change and consider the file
  72. # completely downloaded if the file size differs less than 100 bytes from
  73. # the one in the hard drive.
  74. self.report_file_already_downloaded(filename)
  75. self.try_rename(tmpfilename, filename)
  76. self._hook_progress({
  77. 'filename': filename,
  78. 'status': 'finished',
  79. })
  80. return True
  81. else:
  82. # The length does not match, we start the download over
  83. self.report_unable_to_resume()
  84. open_mode = 'wb'
  85. break
  86. # Retry
  87. count += 1
  88. if count <= retries:
  89. self.report_retry(count, retries)
  90. if count > retries:
  91. self.report_error(u'giving up after %s retries' % retries)
  92. return False
  93. data_len = data.info().get('Content-length', None)
  94. if data_len is not None:
  95. data_len = int(data_len) + resume_len
  96. min_data_len = self.params.get("min_filesize", None)
  97. max_data_len = self.params.get("max_filesize", None)
  98. if min_data_len is not None and data_len < min_data_len:
  99. self.to_screen(u'\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
  100. return False
  101. if max_data_len is not None and data_len > max_data_len:
  102. self.to_screen(u'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
  103. return False
  104. data_len_str = format_bytes(data_len)
  105. byte_counter = 0 + resume_len
  106. block_size = self.params.get('buffersize', 1024)
  107. start = time.time()
  108. while True:
  109. # Download and write
  110. before = time.time()
  111. data_block = data.read(block_size)
  112. after = time.time()
  113. if len(data_block) == 0:
  114. break
  115. byte_counter += len(data_block)
  116. # Open file just in time
  117. if stream is None:
  118. try:
  119. (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
  120. assert stream is not None
  121. filename = self.undo_temp_name(tmpfilename)
  122. self.report_destination(filename)
  123. except (OSError, IOError) as err:
  124. self.report_error(u'unable to open for writing: %s' % str(err))
  125. return False
  126. try:
  127. stream.write(data_block)
  128. except (IOError, OSError):
  129. self.to_stderr(u"\n")
  130. self.report_error(u'unable to write data: %s' % str(err))
  131. return False
  132. if not self.params.get('noresizebuffer', False):
  133. block_size = self.best_block_size(after - before, len(data_block))
  134. # Progress message
  135. speed = self.calc_speed(start, time.time(), byte_counter - resume_len)
  136. if data_len is None:
  137. eta = percent = None
  138. else:
  139. percent = self.calc_percent(byte_counter, data_len)
  140. eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
  141. self.report_progress(percent, data_len_str, speed, eta)
  142. self._hook_progress({
  143. 'downloaded_bytes': byte_counter,
  144. 'total_bytes': data_len,
  145. 'tmpfilename': tmpfilename,
  146. 'filename': filename,
  147. 'status': 'downloading',
  148. 'eta': eta,
  149. 'speed': speed,
  150. })
  151. # Apply rate limit
  152. self.slow_down(start, byte_counter - resume_len)
  153. if stream is None:
  154. self.to_stderr(u"\n")
  155. self.report_error(u'Did not get any data blocks')
  156. return False
  157. stream.close()
  158. self.report_finish(data_len_str, (time.time() - start))
  159. if data_len is not None and byte_counter != data_len:
  160. raise ContentTooShortError(byte_counter, int(data_len))
  161. self.try_rename(tmpfilename, filename)
  162. # Update file modification time
  163. if self.params.get('updatetime', True):
  164. info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
  165. self._hook_progress({
  166. 'downloaded_bytes': byte_counter,
  167. 'total_bytes': byte_counter,
  168. 'filename': filename,
  169. 'status': 'finished',
  170. })
  171. return True