You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

243 lines
9.6 KiB

10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
  1. from __future__ import unicode_literals
  2. import errno
  3. import os
  4. import socket
  5. import time
  6. from .common import FileDownloader
  7. from ..compat import (
  8. compat_urllib_request,
  9. compat_urllib_error,
  10. )
  11. from ..utils import (
  12. ContentTooShortError,
  13. encodeFilename,
  14. sanitize_open,
  15. )
  16. class HttpFD(FileDownloader):
  17. def real_download(self, filename, info_dict):
  18. url = info_dict['url']
  19. tmpfilename = self.temp_name(filename)
  20. stream = None
  21. # Do not include the Accept-Encoding header
  22. headers = {'Youtubedl-no-compression': 'True'}
  23. add_headers = info_dict.get('http_headers')
  24. if add_headers:
  25. headers.update(add_headers)
  26. data = info_dict.get('http_post_data')
  27. http_method = info_dict.get('http_method')
  28. basic_request = compat_urllib_request.Request(url, data, headers)
  29. request = compat_urllib_request.Request(url, data, headers)
  30. if http_method is not None:
  31. basic_request.get_method = lambda: http_method
  32. request.get_method = lambda: http_method
  33. is_test = self.params.get('test', False)
  34. if is_test:
  35. request.add_header('Range', 'bytes=0-%s' % str(self._TEST_FILE_SIZE - 1))
  36. # Establish possible resume length
  37. if os.path.isfile(encodeFilename(tmpfilename)):
  38. resume_len = os.path.getsize(encodeFilename(tmpfilename))
  39. else:
  40. resume_len = 0
  41. open_mode = 'wb'
  42. if resume_len != 0:
  43. if self.params.get('continuedl', False):
  44. self.report_resuming_byte(resume_len)
  45. request.add_header('Range', 'bytes=%d-' % resume_len)
  46. open_mode = 'ab'
  47. else:
  48. resume_len = 0
  49. count = 0
  50. retries = self.params.get('retries', 0)
  51. while count <= retries:
  52. # Establish connection
  53. try:
  54. data = self.ydl.urlopen(request)
  55. break
  56. except (compat_urllib_error.HTTPError, ) as err:
  57. if (err.code < 500 or err.code >= 600) and err.code != 416:
  58. # Unexpected HTTP error
  59. raise
  60. elif err.code == 416:
  61. # Unable to resume (requested range not satisfiable)
  62. try:
  63. # Open the connection again without the range header
  64. data = self.ydl.urlopen(basic_request)
  65. content_length = data.info()['Content-Length']
  66. except (compat_urllib_error.HTTPError, ) as err:
  67. if err.code < 500 or err.code >= 600:
  68. raise
  69. else:
  70. # Examine the reported length
  71. if (content_length is not None and
  72. (resume_len - 100 < int(content_length) < resume_len + 100)):
  73. # The file had already been fully downloaded.
  74. # Explanation to the above condition: in issue #175 it was revealed that
  75. # YouTube sometimes adds or removes a few bytes from the end of the file,
  76. # changing the file size slightly and causing problems for some users. So
  77. # I decided to implement a suggested change and consider the file
  78. # completely downloaded if the file size differs less than 100 bytes from
  79. # the one in the hard drive.
  80. self.report_file_already_downloaded(filename)
  81. self.try_rename(tmpfilename, filename)
  82. self._hook_progress({
  83. 'filename': filename,
  84. 'status': 'finished',
  85. })
  86. return True
  87. else:
  88. # The length does not match, we start the download over
  89. self.report_unable_to_resume()
  90. resume_len = 0
  91. open_mode = 'wb'
  92. break
  93. except socket.error as e:
  94. if e.errno != errno.ECONNRESET:
  95. # Connection reset is no problem, just retry
  96. raise
  97. # Retry
  98. count += 1
  99. if count <= retries:
  100. self.report_retry(count, retries)
  101. if count > retries:
  102. self.report_error('giving up after %s retries' % retries)
  103. return False
  104. data_len = data.info().get('Content-length', None)
  105. # Range HTTP header may be ignored/unsupported by a webserver
  106. # (e.g. extractor/scivee.py, extractor/bambuser.py).
  107. # However, for a test we still would like to download just a piece of a file.
  108. # To achieve this we limit data_len to _TEST_FILE_SIZE and manually control
  109. # block size when downloading a file.
  110. if is_test and (data_len is None or int(data_len) > self._TEST_FILE_SIZE):
  111. data_len = self._TEST_FILE_SIZE
  112. if data_len is not None:
  113. data_len = int(data_len) + resume_len
  114. min_data_len = self.params.get("min_filesize", None)
  115. max_data_len = self.params.get("max_filesize", None)
  116. if min_data_len is not None and data_len < min_data_len:
  117. self.to_screen('\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
  118. return False
  119. if max_data_len is not None and data_len > max_data_len:
  120. self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
  121. return False
  122. byte_counter = 0 + resume_len
  123. block_size = self.params.get('buffersize', 1024)
  124. start = time.time()
  125. # measure time over whole while-loop, so slow_down() and best_block_size() work together properly
  126. now = None # needed for slow_down() in the first loop run
  127. before = start # start measuring
  128. while True:
  129. # Download and write
  130. data_block = data.read(block_size if not is_test else min(block_size, data_len - byte_counter))
  131. byte_counter += len(data_block)
  132. # exit loop when download is finished
  133. if len(data_block) == 0:
  134. break
  135. # Open destination file just in time
  136. if stream is None:
  137. try:
  138. (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
  139. assert stream is not None
  140. filename = self.undo_temp_name(tmpfilename)
  141. self.report_destination(filename)
  142. except (OSError, IOError) as err:
  143. self.report_error('unable to open for writing: %s' % str(err))
  144. return False
  145. if self.params.get('xattr_set_filesize', False) and data_len is not None:
  146. try:
  147. import xattr
  148. xattr.setxattr(tmpfilename, 'user.ytdl.filesize', str(data_len))
  149. except(OSError, IOError, ImportError) as err:
  150. self.report_error('unable to set filesize xattr: %s' % str(err))
  151. try:
  152. stream.write(data_block)
  153. except (IOError, OSError) as err:
  154. self.to_stderr('\n')
  155. self.report_error('unable to write data: %s' % str(err))
  156. return False
  157. # Apply rate limit
  158. self.slow_down(start, now, byte_counter - resume_len)
  159. # end measuring of one loop run
  160. now = time.time()
  161. after = now
  162. # Adjust block size
  163. if not self.params.get('noresizebuffer', False):
  164. block_size = self.best_block_size(after - before, len(data_block))
  165. before = after
  166. # Progress message
  167. speed = self.calc_speed(start, now, byte_counter - resume_len)
  168. if data_len is None:
  169. eta = None
  170. else:
  171. eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
  172. self._hook_progress({
  173. 'status': 'downloading',
  174. 'downloaded_bytes': byte_counter,
  175. 'total_bytes': data_len,
  176. 'tmpfilename': tmpfilename,
  177. 'filename': filename,
  178. 'eta': eta,
  179. 'speed': speed,
  180. 'elapsed': now - start,
  181. })
  182. if is_test and byte_counter == data_len:
  183. break
  184. if stream is None:
  185. self.to_stderr('\n')
  186. self.report_error('Did not get any data blocks')
  187. return False
  188. if tmpfilename != '-':
  189. stream.close()
  190. self._hook_progress({
  191. 'downloaded_bytes': byte_counter,
  192. 'total_bytes': data_len,
  193. 'tmpfilename': tmpfilename,
  194. 'status': 'error',
  195. })
  196. if data_len is not None and byte_counter != data_len:
  197. raise ContentTooShortError(byte_counter, int(data_len))
  198. self.try_rename(tmpfilename, filename)
  199. # Update file modification time
  200. if self.params.get('updatetime', True):
  201. info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
  202. self._hook_progress({
  203. 'downloaded_bytes': byte_counter,
  204. 'total_bytes': byte_counter,
  205. 'filename': filename,
  206. 'status': 'finished',
  207. 'elapsed': time.time() - start,
  208. })
  209. return True