You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

253 lines
11 KiB

10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
  1. from __future__ import unicode_literals
  2. import errno
  3. import os
  4. import socket
  5. import time
  6. import re
  7. from .common import FileDownloader
  8. from ..compat import (
  9. compat_urllib_request,
  10. compat_urllib_error,
  11. )
  12. from ..utils import (
  13. ContentTooShortError,
  14. encodeFilename,
  15. sanitize_open,
  16. )
  17. class HttpFD(FileDownloader):
  18. def real_download(self, filename, info_dict):
  19. url = info_dict['url']
  20. tmpfilename = self.temp_name(filename)
  21. stream = None
  22. # Do not include the Accept-Encoding header
  23. headers = {'Youtubedl-no-compression': 'True'}
  24. add_headers = info_dict.get('http_headers')
  25. if add_headers:
  26. headers.update(add_headers)
  27. basic_request = compat_urllib_request.Request(url, None, headers)
  28. request = compat_urllib_request.Request(url, None, headers)
  29. is_test = self.params.get('test', False)
  30. if is_test:
  31. request.add_header('Range', 'bytes=0-%s' % str(self._TEST_FILE_SIZE - 1))
  32. # Establish possible resume length
  33. if os.path.isfile(encodeFilename(tmpfilename)):
  34. resume_len = os.path.getsize(encodeFilename(tmpfilename))
  35. else:
  36. resume_len = 0
  37. open_mode = 'wb'
  38. if resume_len != 0:
  39. if self.params.get('continuedl', True):
  40. self.report_resuming_byte(resume_len)
  41. request.add_header('Range', 'bytes=%d-' % resume_len)
  42. open_mode = 'ab'
  43. else:
  44. resume_len = 0
  45. count = 0
  46. retries = self.params.get('retries', 0)
  47. while count <= retries:
  48. # Establish connection
  49. try:
  50. data = self.ydl.urlopen(request)
  51. # When trying to resume, Content-Range HTTP header of response has to be checked
  52. # to match the value of requested Range HTTP header. This is due to a webservers
  53. # that don't support resuming and serve a whole file with no Content-Range
  54. # set in response despite of requested Range (see
  55. # https://github.com/rg3/youtube-dl/issues/6057#issuecomment-126129799)
  56. if resume_len > 0:
  57. content_range = data.headers.get('Content-Range')
  58. if content_range:
  59. content_range_m = re.search(r'bytes (\d+)-', content_range)
  60. # Content-Range is present and matches requested Range, resume is possible
  61. if content_range_m and resume_len == int(content_range_m.group(1)):
  62. break
  63. # Content-Range is either not present or invalid. Assuming remote webserver is
  64. # trying to send the whole file, resume is not possible, so wiping the local file
  65. # and performing entire redownload
  66. self.report_unable_to_resume()
  67. resume_len = 0
  68. open_mode = 'wb'
  69. break
  70. except (compat_urllib_error.HTTPError, ) as err:
  71. if (err.code < 500 or err.code >= 600) and err.code != 416:
  72. # Unexpected HTTP error
  73. raise
  74. elif err.code == 416:
  75. # Unable to resume (requested range not satisfiable)
  76. try:
  77. # Open the connection again without the range header
  78. data = self.ydl.urlopen(basic_request)
  79. content_length = data.info()['Content-Length']
  80. except (compat_urllib_error.HTTPError, ) as err:
  81. if err.code < 500 or err.code >= 600:
  82. raise
  83. else:
  84. # Examine the reported length
  85. if (content_length is not None and
  86. (resume_len - 100 < int(content_length) < resume_len + 100)):
  87. # The file had already been fully downloaded.
  88. # Explanation to the above condition: in issue #175 it was revealed that
  89. # YouTube sometimes adds or removes a few bytes from the end of the file,
  90. # changing the file size slightly and causing problems for some users. So
  91. # I decided to implement a suggested change and consider the file
  92. # completely downloaded if the file size differs less than 100 bytes from
  93. # the one in the hard drive.
  94. self.report_file_already_downloaded(filename)
  95. self.try_rename(tmpfilename, filename)
  96. self._hook_progress({
  97. 'filename': filename,
  98. 'status': 'finished',
  99. 'downloaded_bytes': resume_len,
  100. 'total_bytes': resume_len,
  101. })
  102. return True
  103. else:
  104. # The length does not match, we start the download over
  105. self.report_unable_to_resume()
  106. resume_len = 0
  107. open_mode = 'wb'
  108. break
  109. except socket.error as e:
  110. if e.errno != errno.ECONNRESET:
  111. # Connection reset is no problem, just retry
  112. raise
  113. # Retry
  114. count += 1
  115. if count <= retries:
  116. self.report_retry(count, retries)
  117. if count > retries:
  118. self.report_error('giving up after %s retries' % retries)
  119. return False
  120. data_len = data.info().get('Content-length', None)
  121. # Range HTTP header may be ignored/unsupported by a webserver
  122. # (e.g. extractor/scivee.py, extractor/bambuser.py).
  123. # However, for a test we still would like to download just a piece of a file.
  124. # To achieve this we limit data_len to _TEST_FILE_SIZE and manually control
  125. # block size when downloading a file.
  126. if is_test and (data_len is None or int(data_len) > self._TEST_FILE_SIZE):
  127. data_len = self._TEST_FILE_SIZE
  128. if data_len is not None:
  129. data_len = int(data_len) + resume_len
  130. min_data_len = self.params.get("min_filesize", None)
  131. max_data_len = self.params.get("max_filesize", None)
  132. if min_data_len is not None and data_len < min_data_len:
  133. self.to_screen('\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
  134. return False
  135. if max_data_len is not None and data_len > max_data_len:
  136. self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
  137. return False
  138. byte_counter = 0 + resume_len
  139. block_size = self.params.get('buffersize', 1024)
  140. start = time.time()
  141. # measure time over whole while-loop, so slow_down() and best_block_size() work together properly
  142. now = None # needed for slow_down() in the first loop run
  143. before = start # start measuring
  144. while True:
  145. # Download and write
  146. data_block = data.read(block_size if not is_test else min(block_size, data_len - byte_counter))
  147. byte_counter += len(data_block)
  148. # exit loop when download is finished
  149. if len(data_block) == 0:
  150. break
  151. # Open destination file just in time
  152. if stream is None:
  153. try:
  154. (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
  155. assert stream is not None
  156. filename = self.undo_temp_name(tmpfilename)
  157. self.report_destination(filename)
  158. except (OSError, IOError) as err:
  159. self.report_error('unable to open for writing: %s' % str(err))
  160. return False
  161. if self.params.get('xattr_set_filesize', False) and data_len is not None:
  162. try:
  163. import xattr
  164. xattr.setxattr(tmpfilename, 'user.ytdl.filesize', str(data_len))
  165. except(OSError, IOError, ImportError) as err:
  166. self.report_error('unable to set filesize xattr: %s' % str(err))
  167. try:
  168. stream.write(data_block)
  169. except (IOError, OSError) as err:
  170. self.to_stderr('\n')
  171. self.report_error('unable to write data: %s' % str(err))
  172. return False
  173. # Apply rate limit
  174. self.slow_down(start, now, byte_counter - resume_len)
  175. # end measuring of one loop run
  176. now = time.time()
  177. after = now
  178. # Adjust block size
  179. if not self.params.get('noresizebuffer', False):
  180. block_size = self.best_block_size(after - before, len(data_block))
  181. before = after
  182. # Progress message
  183. speed = self.calc_speed(start, now, byte_counter - resume_len)
  184. if data_len is None:
  185. eta = None
  186. else:
  187. eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
  188. self._hook_progress({
  189. 'status': 'downloading',
  190. 'downloaded_bytes': byte_counter,
  191. 'total_bytes': data_len,
  192. 'tmpfilename': tmpfilename,
  193. 'filename': filename,
  194. 'eta': eta,
  195. 'speed': speed,
  196. 'elapsed': now - start,
  197. })
  198. if is_test and byte_counter == data_len:
  199. break
  200. if stream is None:
  201. self.to_stderr('\n')
  202. self.report_error('Did not get any data blocks')
  203. return False
  204. if tmpfilename != '-':
  205. stream.close()
  206. if data_len is not None and byte_counter != data_len:
  207. raise ContentTooShortError(byte_counter, int(data_len))
  208. self.try_rename(tmpfilename, filename)
  209. # Update file modification time
  210. if self.params.get('updatetime', True):
  211. info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
  212. self._hook_progress({
  213. 'downloaded_bytes': byte_counter,
  214. 'total_bytes': byte_counter,
  215. 'filename': filename,
  216. 'status': 'finished',
  217. 'elapsed': time.time() - start,
  218. })
  219. return True