You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

444 lines
15 KiB

10 years ago
11 years ago
11 years ago
11 years ago
10 years ago
10 years ago
  1. from __future__ import division, unicode_literals
  2. import base64
  3. import io
  4. import itertools
  5. import os
  6. import time
  7. import xml.etree.ElementTree as etree
  8. from .common import FileDownloader
  9. from .http import HttpFD
  10. from ..compat import (
  11. compat_urlparse,
  12. compat_urllib_error,
  13. )
  14. from ..utils import (
  15. struct_pack,
  16. struct_unpack,
  17. encodeFilename,
  18. sanitize_open,
  19. xpath_text,
  20. )
  21. class FlvReader(io.BytesIO):
  22. """
  23. Reader for Flv files
  24. The file format is documented in https://www.adobe.com/devnet/f4v.html
  25. """
  26. # Utility functions for reading numbers and strings
  27. def read_unsigned_long_long(self):
  28. return struct_unpack('!Q', self.read(8))[0]
  29. def read_unsigned_int(self):
  30. return struct_unpack('!I', self.read(4))[0]
  31. def read_unsigned_char(self):
  32. return struct_unpack('!B', self.read(1))[0]
  33. def read_string(self):
  34. res = b''
  35. while True:
  36. char = self.read(1)
  37. if char == b'\x00':
  38. break
  39. res += char
  40. return res
  41. def read_box_info(self):
  42. """
  43. Read a box and return the info as a tuple: (box_size, box_type, box_data)
  44. """
  45. real_size = size = self.read_unsigned_int()
  46. box_type = self.read(4)
  47. header_end = 8
  48. if size == 1:
  49. real_size = self.read_unsigned_long_long()
  50. header_end = 16
  51. return real_size, box_type, self.read(real_size - header_end)
  52. def read_asrt(self):
  53. # version
  54. self.read_unsigned_char()
  55. # flags
  56. self.read(3)
  57. quality_entry_count = self.read_unsigned_char()
  58. # QualityEntryCount
  59. for i in range(quality_entry_count):
  60. self.read_string()
  61. segment_run_count = self.read_unsigned_int()
  62. segments = []
  63. for i in range(segment_run_count):
  64. first_segment = self.read_unsigned_int()
  65. fragments_per_segment = self.read_unsigned_int()
  66. segments.append((first_segment, fragments_per_segment))
  67. return {
  68. 'segment_run': segments,
  69. }
  70. def read_afrt(self):
  71. # version
  72. self.read_unsigned_char()
  73. # flags
  74. self.read(3)
  75. # time scale
  76. self.read_unsigned_int()
  77. quality_entry_count = self.read_unsigned_char()
  78. # QualitySegmentUrlModifiers
  79. for i in range(quality_entry_count):
  80. self.read_string()
  81. fragments_count = self.read_unsigned_int()
  82. fragments = []
  83. for i in range(fragments_count):
  84. first = self.read_unsigned_int()
  85. first_ts = self.read_unsigned_long_long()
  86. duration = self.read_unsigned_int()
  87. if duration == 0:
  88. discontinuity_indicator = self.read_unsigned_char()
  89. else:
  90. discontinuity_indicator = None
  91. fragments.append({
  92. 'first': first,
  93. 'ts': first_ts,
  94. 'duration': duration,
  95. 'discontinuity_indicator': discontinuity_indicator,
  96. })
  97. return {
  98. 'fragments': fragments,
  99. }
  100. def read_abst(self):
  101. # version
  102. self.read_unsigned_char()
  103. # flags
  104. self.read(3)
  105. self.read_unsigned_int() # BootstrapinfoVersion
  106. # Profile,Live,Update,Reserved
  107. flags = self.read_unsigned_char()
  108. live = flags & 0x20 != 0
  109. # time scale
  110. self.read_unsigned_int()
  111. # CurrentMediaTime
  112. self.read_unsigned_long_long()
  113. # SmpteTimeCodeOffset
  114. self.read_unsigned_long_long()
  115. self.read_string() # MovieIdentifier
  116. server_count = self.read_unsigned_char()
  117. # ServerEntryTable
  118. for i in range(server_count):
  119. self.read_string()
  120. quality_count = self.read_unsigned_char()
  121. # QualityEntryTable
  122. for i in range(quality_count):
  123. self.read_string()
  124. # DrmData
  125. self.read_string()
  126. # MetaData
  127. self.read_string()
  128. segments_count = self.read_unsigned_char()
  129. segments = []
  130. for i in range(segments_count):
  131. box_size, box_type, box_data = self.read_box_info()
  132. assert box_type == b'asrt'
  133. segment = FlvReader(box_data).read_asrt()
  134. segments.append(segment)
  135. fragments_run_count = self.read_unsigned_char()
  136. fragments = []
  137. for i in range(fragments_run_count):
  138. box_size, box_type, box_data = self.read_box_info()
  139. assert box_type == b'afrt'
  140. fragments.append(FlvReader(box_data).read_afrt())
  141. return {
  142. 'segments': segments,
  143. 'fragments': fragments,
  144. 'live': live,
  145. }
  146. def read_bootstrap_info(self):
  147. total_size, box_type, box_data = self.read_box_info()
  148. assert box_type == b'abst'
  149. return FlvReader(box_data).read_abst()
  150. def read_bootstrap_info(bootstrap_bytes):
  151. return FlvReader(bootstrap_bytes).read_bootstrap_info()
  152. def build_fragments_list(boot_info):
  153. """ Return a list of (segment, fragment) for each fragment in the video """
  154. res = []
  155. segment_run_table = boot_info['segments'][0]
  156. fragment_run_entry_table = boot_info['fragments'][0]['fragments']
  157. first_frag_number = fragment_run_entry_table[0]['first']
  158. fragments_counter = itertools.count(first_frag_number)
  159. for segment, fragments_count in segment_run_table['segment_run']:
  160. for _ in range(fragments_count):
  161. res.append((segment, next(fragments_counter)))
  162. if boot_info['live']:
  163. res = res[-2:]
  164. return res
  165. def write_unsigned_int(stream, val):
  166. stream.write(struct_pack('!I', val))
  167. def write_unsigned_int_24(stream, val):
  168. stream.write(struct_pack('!I', val)[1:])
  169. def write_flv_header(stream):
  170. """Writes the FLV header to stream"""
  171. # FLV header
  172. stream.write(b'FLV\x01')
  173. stream.write(b'\x05')
  174. stream.write(b'\x00\x00\x00\x09')
  175. stream.write(b'\x00\x00\x00\x00')
  176. def write_metadata_tag(stream, metadata):
  177. """Writes optional metadata tag to stream"""
  178. SCRIPT_TAG = b'\x12'
  179. FLV_TAG_HEADER_LEN = 11
  180. if metadata:
  181. stream.write(SCRIPT_TAG)
  182. write_unsigned_int_24(stream, len(metadata))
  183. stream.write(b'\x00\x00\x00\x00\x00\x00\x00')
  184. stream.write(metadata)
  185. write_unsigned_int(stream, FLV_TAG_HEADER_LEN + len(metadata))
  186. def _add_ns(prop):
  187. return '{http://ns.adobe.com/f4m/1.0}%s' % prop
  188. class HttpQuietDownloader(HttpFD):
  189. def to_screen(self, *args, **kargs):
  190. pass
  191. class F4mFD(FileDownloader):
  192. """
  193. A downloader for f4m manifests or AdobeHDS.
  194. """
  195. def _get_unencrypted_media(self, doc):
  196. media = doc.findall(_add_ns('media'))
  197. if not media:
  198. self.report_error('No media found')
  199. for e in (doc.findall(_add_ns('drmAdditionalHeader')) +
  200. doc.findall(_add_ns('drmAdditionalHeaderSet'))):
  201. # If id attribute is missing it's valid for all media nodes
  202. # without drmAdditionalHeaderId or drmAdditionalHeaderSetId attribute
  203. if 'id' not in e.attrib:
  204. self.report_error('Missing ID in f4m DRM')
  205. media = list(filter(lambda e: 'drmAdditionalHeaderId' not in e.attrib and
  206. 'drmAdditionalHeaderSetId' not in e.attrib,
  207. media))
  208. if not media:
  209. self.report_error('Unsupported DRM')
  210. return media
  211. def _get_bootstrap_from_url(self, bootstrap_url):
  212. bootstrap = self.ydl.urlopen(bootstrap_url).read()
  213. return read_bootstrap_info(bootstrap)
  214. def _update_live_fragments(self, bootstrap_url, latest_fragment):
  215. fragments_list = []
  216. retries = 30
  217. while (not fragments_list) and (retries > 0):
  218. boot_info = self._get_bootstrap_from_url(bootstrap_url)
  219. fragments_list = build_fragments_list(boot_info)
  220. fragments_list = [f for f in fragments_list if f[1] > latest_fragment]
  221. if not fragments_list:
  222. # Retry after a while
  223. time.sleep(5.0)
  224. retries -= 1
  225. if not fragments_list:
  226. self.report_error('Failed to update fragments')
  227. return fragments_list
  228. def _parse_bootstrap_node(self, node, base_url):
  229. if node.text is None:
  230. bootstrap_url = compat_urlparse.urljoin(
  231. base_url, node.attrib['url'])
  232. boot_info = self._get_bootstrap_from_url(bootstrap_url)
  233. else:
  234. bootstrap_url = None
  235. bootstrap = base64.b64decode(node.text.encode('ascii'))
  236. boot_info = read_bootstrap_info(bootstrap)
  237. return (boot_info, bootstrap_url)
  238. def real_download(self, filename, info_dict):
  239. man_url = info_dict['url']
  240. requested_bitrate = info_dict.get('tbr')
  241. self.to_screen('[download] Downloading f4m manifest')
  242. manifest = self.ydl.urlopen(man_url).read()
  243. doc = etree.fromstring(manifest)
  244. formats = [(int(f.attrib.get('bitrate', -1)), f)
  245. for f in self._get_unencrypted_media(doc)]
  246. if requested_bitrate is None:
  247. # get the best format
  248. formats = sorted(formats, key=lambda f: f[0])
  249. rate, media = formats[-1]
  250. else:
  251. rate, media = list(filter(
  252. lambda f: int(f[0]) == requested_bitrate, formats))[0]
  253. base_url = compat_urlparse.urljoin(man_url, media.attrib['url'])
  254. bootstrap_node = doc.find(_add_ns('bootstrapInfo'))
  255. boot_info, bootstrap_url = self._parse_bootstrap_node(bootstrap_node, base_url)
  256. live = boot_info['live']
  257. metadata_node = media.find(_add_ns('metadata'))
  258. if metadata_node is not None:
  259. metadata = base64.b64decode(metadata_node.text.encode('ascii'))
  260. else:
  261. metadata = None
  262. fragments_list = build_fragments_list(boot_info)
  263. if self.params.get('test', False):
  264. # We only download the first fragment
  265. fragments_list = fragments_list[:1]
  266. total_frags = len(fragments_list)
  267. # For some akamai manifests we'll need to add a query to the fragment url
  268. akamai_pv = xpath_text(doc, _add_ns('pv-2.0'))
  269. self.report_destination(filename)
  270. http_dl = HttpQuietDownloader(
  271. self.ydl,
  272. {
  273. 'continuedl': True,
  274. 'quiet': True,
  275. 'noprogress': True,
  276. 'ratelimit': self.params.get('ratelimit', None),
  277. 'test': self.params.get('test', False),
  278. }
  279. )
  280. tmpfilename = self.temp_name(filename)
  281. (dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb')
  282. write_flv_header(dest_stream)
  283. if not live:
  284. write_metadata_tag(dest_stream, metadata)
  285. # This dict stores the download progress, it's updated by the progress
  286. # hook
  287. state = {
  288. 'status': 'downloading',
  289. 'downloaded_bytes': 0,
  290. 'frag_index': 0,
  291. 'frag_count': total_frags,
  292. 'filename': filename,
  293. 'tmpfilename': tmpfilename,
  294. }
  295. start = time.time()
  296. def frag_progress_hook(s):
  297. if s['status'] not in ('downloading', 'finished'):
  298. return
  299. frag_total_bytes = s.get('total_bytes', 0)
  300. if s['status'] == 'finished':
  301. state['downloaded_bytes'] += frag_total_bytes
  302. state['frag_index'] += 1
  303. estimated_size = (
  304. (state['downloaded_bytes'] + frag_total_bytes) /
  305. (state['frag_index'] + 1) * total_frags)
  306. time_now = time.time()
  307. state['total_bytes_estimate'] = estimated_size
  308. state['elapsed'] = time_now - start
  309. if s['status'] == 'finished':
  310. progress = self.calc_percent(state['frag_index'], total_frags)
  311. else:
  312. frag_downloaded_bytes = s['downloaded_bytes']
  313. frag_progress = self.calc_percent(frag_downloaded_bytes,
  314. frag_total_bytes)
  315. progress = self.calc_percent(state['frag_index'], total_frags)
  316. progress += frag_progress / float(total_frags)
  317. state['eta'] = self.calc_eta(
  318. start, time_now, estimated_size, state['downloaded_bytes'] + frag_downloaded_bytes)
  319. state['speed'] = s.get('speed')
  320. self._hook_progress(state)
  321. http_dl.add_progress_hook(frag_progress_hook)
  322. frags_filenames = []
  323. while fragments_list:
  324. seg_i, frag_i = fragments_list.pop(0)
  325. name = 'Seg%d-Frag%d' % (seg_i, frag_i)
  326. url = base_url + name
  327. if akamai_pv:
  328. url += '?' + akamai_pv.strip(';')
  329. if info_dict.get('extra_param_to_segment_url'):
  330. url += info_dict.get('extra_param_to_segment_url')
  331. frag_filename = '%s-%s' % (tmpfilename, name)
  332. try:
  333. success = http_dl.download(frag_filename, {'url': url})
  334. if not success:
  335. return False
  336. with open(frag_filename, 'rb') as down:
  337. down_data = down.read()
  338. reader = FlvReader(down_data)
  339. while True:
  340. _, box_type, box_data = reader.read_box_info()
  341. if box_type == b'mdat':
  342. dest_stream.write(box_data)
  343. break
  344. if live:
  345. os.remove(frag_filename)
  346. else:
  347. frags_filenames.append(frag_filename)
  348. except (compat_urllib_error.HTTPError, ) as err:
  349. if live and (err.code == 404 or err.code == 410):
  350. # We didn't keep up with the live window. Continue
  351. # with the next available fragment.
  352. msg = 'Fragment %d unavailable' % frag_i
  353. self.report_warning(msg)
  354. fragments_list = []
  355. else:
  356. raise
  357. if not fragments_list and live and bootstrap_url:
  358. fragments_list = self._update_live_fragments(bootstrap_url, frag_i)
  359. total_frags += len(fragments_list)
  360. if fragments_list and (fragments_list[0][1] > frag_i + 1):
  361. msg = 'Missed %d fragments' % (fragments_list[0][1] - (frag_i + 1))
  362. self.report_warning(msg)
  363. dest_stream.close()
  364. elapsed = time.time() - start
  365. self.try_rename(tmpfilename, filename)
  366. for frag_file in frags_filenames:
  367. os.remove(frag_file)
  368. fsize = os.path.getsize(encodeFilename(filename))
  369. self._hook_progress({
  370. 'downloaded_bytes': fsize,
  371. 'total_bytes': fsize,
  372. 'filename': filename,
  373. 'status': 'finished',
  374. 'elapsed': elapsed,
  375. })
  376. return True