You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

214 lines
8.9 KiB

10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
  1. from __future__ import unicode_literals
  2. import os
  3. import time
  4. from .common import FileDownloader
  5. from ..utils import (
  6. compat_urllib_request,
  7. compat_urllib_error,
  8. ContentTooShortError,
  9. encodeFilename,
  10. sanitize_open,
  11. format_bytes,
  12. )
  13. class HttpFD(FileDownloader):
  14. def real_download(self, filename, info_dict):
  15. url = info_dict['url']
  16. tmpfilename = self.temp_name(filename)
  17. stream = None
  18. # Do not include the Accept-Encoding header
  19. headers = {'Youtubedl-no-compression': 'True'}
  20. if 'user_agent' in info_dict:
  21. headers['Youtubedl-user-agent'] = info_dict['user_agent']
  22. if 'http_referer' in info_dict:
  23. headers['Referer'] = info_dict['http_referer']
  24. add_headers = info_dict.get('http_headers')
  25. if add_headers:
  26. headers.update(add_headers)
  27. data = info_dict.get('http_post_data')
  28. http_method = info_dict.get('http_method')
  29. basic_request = compat_urllib_request.Request(url, data, headers)
  30. request = compat_urllib_request.Request(url, data, headers)
  31. if http_method is not None:
  32. basic_request.get_method = lambda: http_method
  33. request.get_method = lambda: http_method
  34. is_test = self.params.get('test', False)
  35. if is_test:
  36. request.add_header('Range', 'bytes=0-%s' % str(self._TEST_FILE_SIZE - 1))
  37. # Establish possible resume length
  38. if os.path.isfile(encodeFilename(tmpfilename)):
  39. resume_len = os.path.getsize(encodeFilename(tmpfilename))
  40. else:
  41. resume_len = 0
  42. open_mode = 'wb'
  43. if resume_len != 0:
  44. if self.params.get('continuedl', False):
  45. self.report_resuming_byte(resume_len)
  46. request.add_header('Range', 'bytes=%d-' % resume_len)
  47. open_mode = 'ab'
  48. else:
  49. resume_len = 0
  50. count = 0
  51. retries = self.params.get('retries', 0)
  52. while count <= retries:
  53. # Establish connection
  54. try:
  55. data = self.ydl.urlopen(request)
  56. break
  57. except (compat_urllib_error.HTTPError, ) as err:
  58. if (err.code < 500 or err.code >= 600) and err.code != 416:
  59. # Unexpected HTTP error
  60. raise
  61. elif err.code == 416:
  62. # Unable to resume (requested range not satisfiable)
  63. try:
  64. # Open the connection again without the range header
  65. data = self.ydl.urlopen(basic_request)
  66. content_length = data.info()['Content-Length']
  67. except (compat_urllib_error.HTTPError, ) as err:
  68. if err.code < 500 or err.code >= 600:
  69. raise
  70. else:
  71. # Examine the reported length
  72. if (content_length is not None and
  73. (resume_len - 100 < int(content_length) < resume_len + 100)):
  74. # The file had already been fully downloaded.
  75. # Explanation to the above condition: in issue #175 it was revealed that
  76. # YouTube sometimes adds or removes a few bytes from the end of the file,
  77. # changing the file size slightly and causing problems for some users. So
  78. # I decided to implement a suggested change and consider the file
  79. # completely downloaded if the file size differs less than 100 bytes from
  80. # the one in the hard drive.
  81. self.report_file_already_downloaded(filename)
  82. self.try_rename(tmpfilename, filename)
  83. self._hook_progress({
  84. 'filename': filename,
  85. 'status': 'finished',
  86. })
  87. return True
  88. else:
  89. # The length does not match, we start the download over
  90. self.report_unable_to_resume()
  91. resume_len = 0
  92. open_mode = 'wb'
  93. break
  94. # Retry
  95. count += 1
  96. if count <= retries:
  97. self.report_retry(count, retries)
  98. if count > retries:
  99. self.report_error('giving up after %s retries' % retries)
  100. return False
  101. data_len = data.info().get('Content-length', None)
  102. # Range HTTP header may be ignored/unsupported by a webserver
  103. # (e.g. extractor/scivee.py, extractor/bambuser.py).
  104. # However, for a test we still would like to download just a piece of a file.
  105. # To achieve this we limit data_len to _TEST_FILE_SIZE and manually control
  106. # block size when downloading a file.
  107. if is_test and (data_len is None or int(data_len) > self._TEST_FILE_SIZE):
  108. data_len = self._TEST_FILE_SIZE
  109. if data_len is not None:
  110. data_len = int(data_len) + resume_len
  111. min_data_len = self.params.get("min_filesize", None)
  112. max_data_len = self.params.get("max_filesize", None)
  113. if min_data_len is not None and data_len < min_data_len:
  114. self.to_screen('\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
  115. return False
  116. if max_data_len is not None and data_len > max_data_len:
  117. self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
  118. return False
  119. data_len_str = format_bytes(data_len)
  120. byte_counter = 0 + resume_len
  121. block_size = self.params.get('buffersize', 1024)
  122. start = time.time()
  123. while True:
  124. # Download and write
  125. before = time.time()
  126. data_block = data.read(block_size if not is_test else min(block_size, data_len - byte_counter))
  127. after = time.time()
  128. if len(data_block) == 0:
  129. break
  130. byte_counter += len(data_block)
  131. # Open file just in time
  132. if stream is None:
  133. try:
  134. (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
  135. assert stream is not None
  136. filename = self.undo_temp_name(tmpfilename)
  137. self.report_destination(filename)
  138. except (OSError, IOError) as err:
  139. self.report_error('unable to open for writing: %s' % str(err))
  140. return False
  141. try:
  142. stream.write(data_block)
  143. except (IOError, OSError) as err:
  144. self.to_stderr('\n')
  145. self.report_error('unable to write data: %s' % str(err))
  146. return False
  147. if not self.params.get('noresizebuffer', False):
  148. block_size = self.best_block_size(after - before, len(data_block))
  149. # Progress message
  150. speed = self.calc_speed(start, time.time(), byte_counter - resume_len)
  151. if data_len is None:
  152. eta = percent = None
  153. else:
  154. percent = self.calc_percent(byte_counter, data_len)
  155. eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
  156. self.report_progress(percent, data_len_str, speed, eta)
  157. self._hook_progress({
  158. 'downloaded_bytes': byte_counter,
  159. 'total_bytes': data_len,
  160. 'tmpfilename': tmpfilename,
  161. 'filename': filename,
  162. 'status': 'downloading',
  163. 'eta': eta,
  164. 'speed': speed,
  165. })
  166. if is_test and byte_counter == data_len:
  167. break
  168. # Apply rate limit
  169. self.slow_down(start, byte_counter - resume_len)
  170. if stream is None:
  171. self.to_stderr('\n')
  172. self.report_error('Did not get any data blocks')
  173. return False
  174. if tmpfilename != '-':
  175. stream.close()
  176. self.report_finish(data_len_str, (time.time() - start))
  177. if data_len is not None and byte_counter != data_len:
  178. raise ContentTooShortError(byte_counter, int(data_len))
  179. self.try_rename(tmpfilename, filename)
  180. # Update file modification time
  181. if self.params.get('updatetime', True):
  182. info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
  183. self._hook_progress({
  184. 'downloaded_bytes': byte_counter,
  185. 'total_bytes': byte_counter,
  186. 'filename': filename,
  187. 'status': 'finished',
  188. })
  189. return True