You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

386 lines
12 KiB

10 years ago
11 years ago
11 years ago
11 years ago
10 years ago
10 years ago
  1. from __future__ import division, unicode_literals
  2. import base64
  3. import io
  4. import itertools
  5. import os
  6. import time
  7. import xml.etree.ElementTree as etree
  8. from .common import FileDownloader
  9. from .http import HttpFD
  10. from ..compat import (
  11. compat_urlparse,
  12. )
  13. from ..utils import (
  14. struct_pack,
  15. struct_unpack,
  16. encodeFilename,
  17. sanitize_open,
  18. xpath_text,
  19. )
  20. class FlvReader(io.BytesIO):
  21. """
  22. Reader for Flv files
  23. The file format is documented in https://www.adobe.com/devnet/f4v.html
  24. """
  25. # Utility functions for reading numbers and strings
  26. def read_unsigned_long_long(self):
  27. return struct_unpack('!Q', self.read(8))[0]
  28. def read_unsigned_int(self):
  29. return struct_unpack('!I', self.read(4))[0]
  30. def read_unsigned_char(self):
  31. return struct_unpack('!B', self.read(1))[0]
  32. def read_string(self):
  33. res = b''
  34. while True:
  35. char = self.read(1)
  36. if char == b'\x00':
  37. break
  38. res += char
  39. return res
  40. def read_box_info(self):
  41. """
  42. Read a box and return the info as a tuple: (box_size, box_type, box_data)
  43. """
  44. real_size = size = self.read_unsigned_int()
  45. box_type = self.read(4)
  46. header_end = 8
  47. if size == 1:
  48. real_size = self.read_unsigned_long_long()
  49. header_end = 16
  50. return real_size, box_type, self.read(real_size - header_end)
  51. def read_asrt(self):
  52. # version
  53. self.read_unsigned_char()
  54. # flags
  55. self.read(3)
  56. quality_entry_count = self.read_unsigned_char()
  57. # QualityEntryCount
  58. for i in range(quality_entry_count):
  59. self.read_string()
  60. segment_run_count = self.read_unsigned_int()
  61. segments = []
  62. for i in range(segment_run_count):
  63. first_segment = self.read_unsigned_int()
  64. fragments_per_segment = self.read_unsigned_int()
  65. segments.append((first_segment, fragments_per_segment))
  66. return {
  67. 'segment_run': segments,
  68. }
  69. def read_afrt(self):
  70. # version
  71. self.read_unsigned_char()
  72. # flags
  73. self.read(3)
  74. # time scale
  75. self.read_unsigned_int()
  76. quality_entry_count = self.read_unsigned_char()
  77. # QualitySegmentUrlModifiers
  78. for i in range(quality_entry_count):
  79. self.read_string()
  80. fragments_count = self.read_unsigned_int()
  81. fragments = []
  82. for i in range(fragments_count):
  83. first = self.read_unsigned_int()
  84. first_ts = self.read_unsigned_long_long()
  85. duration = self.read_unsigned_int()
  86. if duration == 0:
  87. discontinuity_indicator = self.read_unsigned_char()
  88. else:
  89. discontinuity_indicator = None
  90. fragments.append({
  91. 'first': first,
  92. 'ts': first_ts,
  93. 'duration': duration,
  94. 'discontinuity_indicator': discontinuity_indicator,
  95. })
  96. return {
  97. 'fragments': fragments,
  98. }
  99. def read_abst(self):
  100. # version
  101. self.read_unsigned_char()
  102. # flags
  103. self.read(3)
  104. self.read_unsigned_int() # BootstrapinfoVersion
  105. # Profile,Live,Update,Reserved
  106. self.read(1)
  107. # time scale
  108. self.read_unsigned_int()
  109. # CurrentMediaTime
  110. self.read_unsigned_long_long()
  111. # SmpteTimeCodeOffset
  112. self.read_unsigned_long_long()
  113. self.read_string() # MovieIdentifier
  114. server_count = self.read_unsigned_char()
  115. # ServerEntryTable
  116. for i in range(server_count):
  117. self.read_string()
  118. quality_count = self.read_unsigned_char()
  119. # QualityEntryTable
  120. for i in range(quality_count):
  121. self.read_string()
  122. # DrmData
  123. self.read_string()
  124. # MetaData
  125. self.read_string()
  126. segments_count = self.read_unsigned_char()
  127. segments = []
  128. for i in range(segments_count):
  129. box_size, box_type, box_data = self.read_box_info()
  130. assert box_type == b'asrt'
  131. segment = FlvReader(box_data).read_asrt()
  132. segments.append(segment)
  133. fragments_run_count = self.read_unsigned_char()
  134. fragments = []
  135. for i in range(fragments_run_count):
  136. box_size, box_type, box_data = self.read_box_info()
  137. assert box_type == b'afrt'
  138. fragments.append(FlvReader(box_data).read_afrt())
  139. return {
  140. 'segments': segments,
  141. 'fragments': fragments,
  142. }
  143. def read_bootstrap_info(self):
  144. total_size, box_type, box_data = self.read_box_info()
  145. assert box_type == b'abst'
  146. return FlvReader(box_data).read_abst()
  147. def read_bootstrap_info(bootstrap_bytes):
  148. return FlvReader(bootstrap_bytes).read_bootstrap_info()
  149. def build_fragments_list(boot_info):
  150. """ Return a list of (segment, fragment) for each fragment in the video """
  151. res = []
  152. segment_run_table = boot_info['segments'][0]
  153. fragment_run_entry_table = boot_info['fragments'][0]['fragments']
  154. first_frag_number = fragment_run_entry_table[0]['first']
  155. fragments_counter = itertools.count(first_frag_number)
  156. for segment, fragments_count in segment_run_table['segment_run']:
  157. for _ in range(fragments_count):
  158. res.append((segment, next(fragments_counter)))
  159. return res
  160. def write_unsigned_int(stream, val):
  161. stream.write(struct_pack('!I', val))
  162. def write_unsigned_int_24(stream, val):
  163. stream.write(struct_pack('!I', val)[1:])
  164. def write_flv_header(stream):
  165. """Writes the FLV header to stream"""
  166. # FLV header
  167. stream.write(b'FLV\x01')
  168. stream.write(b'\x05')
  169. stream.write(b'\x00\x00\x00\x09')
  170. stream.write(b'\x00\x00\x00\x00')
  171. def write_metadata_tag(stream, metadata):
  172. """Writes optional metadata tag to stream"""
  173. SCRIPT_TAG = b'\x12'
  174. FLV_TAG_HEADER_LEN = 11
  175. if metadata:
  176. stream.write(SCRIPT_TAG)
  177. write_unsigned_int_24(stream, len(metadata))
  178. stream.write(b'\x00\x00\x00\x00\x00\x00\x00')
  179. stream.write(metadata)
  180. write_unsigned_int(stream, FLV_TAG_HEADER_LEN + len(metadata))
  181. def _add_ns(prop):
  182. return '{http://ns.adobe.com/f4m/1.0}%s' % prop
  183. class HttpQuietDownloader(HttpFD):
  184. def to_screen(self, *args, **kargs):
  185. pass
  186. class F4mFD(FileDownloader):
  187. """
  188. A downloader for f4m manifests or AdobeHDS.
  189. """
  190. def _get_unencrypted_media(self, doc):
  191. media = doc.findall(_add_ns('media'))
  192. if not media:
  193. self.report_error('No media found')
  194. for e in (doc.findall(_add_ns('drmAdditionalHeader')) +
  195. doc.findall(_add_ns('drmAdditionalHeaderSet'))):
  196. # If id attribute is missing it's valid for all media nodes
  197. # without drmAdditionalHeaderId or drmAdditionalHeaderSetId attribute
  198. if 'id' not in e.attrib:
  199. self.report_error('Missing ID in f4m DRM')
  200. media = list(filter(lambda e: 'drmAdditionalHeaderId' not in e.attrib and
  201. 'drmAdditionalHeaderSetId' not in e.attrib,
  202. media))
  203. if not media:
  204. self.report_error('Unsupported DRM')
  205. return media
  206. def real_download(self, filename, info_dict):
  207. man_url = info_dict['url']
  208. requested_bitrate = info_dict.get('tbr')
  209. self.to_screen('[download] Downloading f4m manifest')
  210. manifest = self.ydl.urlopen(man_url).read()
  211. doc = etree.fromstring(manifest)
  212. formats = [(int(f.attrib.get('bitrate', -1)), f)
  213. for f in self._get_unencrypted_media(doc)]
  214. if requested_bitrate is None:
  215. # get the best format
  216. formats = sorted(formats, key=lambda f: f[0])
  217. rate, media = formats[-1]
  218. else:
  219. rate, media = list(filter(
  220. lambda f: int(f[0]) == requested_bitrate, formats))[0]
  221. base_url = compat_urlparse.urljoin(man_url, media.attrib['url'])
  222. bootstrap_node = doc.find(_add_ns('bootstrapInfo'))
  223. if bootstrap_node.text is None:
  224. bootstrap_url = compat_urlparse.urljoin(
  225. base_url, bootstrap_node.attrib['url'])
  226. bootstrap = self.ydl.urlopen(bootstrap_url).read()
  227. else:
  228. bootstrap = base64.b64decode(bootstrap_node.text)
  229. metadata_node = media.find(_add_ns('metadata'))
  230. if metadata_node is not None:
  231. metadata = base64.b64decode(metadata_node.text)
  232. else:
  233. metadata = None
  234. boot_info = read_bootstrap_info(bootstrap)
  235. fragments_list = build_fragments_list(boot_info)
  236. if self.params.get('test', False):
  237. # We only download the first fragment
  238. fragments_list = fragments_list[:1]
  239. total_frags = len(fragments_list)
  240. # For some akamai manifests we'll need to add a query to the fragment url
  241. akamai_pv = xpath_text(doc, _add_ns('pv-2.0'))
  242. self.report_destination(filename)
  243. http_dl = HttpQuietDownloader(
  244. self.ydl,
  245. {
  246. 'continuedl': True,
  247. 'quiet': True,
  248. 'noprogress': True,
  249. 'ratelimit': self.params.get('ratelimit', None),
  250. 'test': self.params.get('test', False),
  251. }
  252. )
  253. tmpfilename = self.temp_name(filename)
  254. (dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb')
  255. write_flv_header(dest_stream)
  256. write_metadata_tag(dest_stream, metadata)
  257. # This dict stores the download progress, it's updated by the progress
  258. # hook
  259. state = {
  260. 'status': 'downloading',
  261. 'downloaded_bytes': 0,
  262. 'frag_index': 0,
  263. 'frag_count': total_frags,
  264. 'filename': filename,
  265. 'tmpfilename': tmpfilename,
  266. }
  267. start = time.time()
  268. def frag_progress_hook(s):
  269. if s['status'] not in ('downloading', 'finished'):
  270. return
  271. frag_total_bytes = s.get('total_bytes', 0)
  272. if s['status'] == 'finished':
  273. state['downloaded_bytes'] += frag_total_bytes
  274. state['frag_index'] += 1
  275. estimated_size = (
  276. (state['downloaded_bytes'] + frag_total_bytes) /
  277. (state['frag_index'] + 1) * total_frags)
  278. time_now = time.time()
  279. state['total_bytes_estimate'] = estimated_size
  280. state['elapsed'] = time_now - start
  281. if s['status'] == 'finished':
  282. progress = self.calc_percent(state['frag_index'], total_frags)
  283. else:
  284. frag_downloaded_bytes = s['downloaded_bytes']
  285. frag_progress = self.calc_percent(frag_downloaded_bytes,
  286. frag_total_bytes)
  287. progress = self.calc_percent(state['frag_index'], total_frags)
  288. progress += frag_progress / float(total_frags)
  289. state['eta'] = self.calc_eta(
  290. start, time_now, estimated_size, state['downloaded_bytes'] + frag_downloaded_bytes)
  291. state['speed'] = s.get('speed')
  292. self._hook_progress(state)
  293. http_dl.add_progress_hook(frag_progress_hook)
  294. frags_filenames = []
  295. for (seg_i, frag_i) in fragments_list:
  296. name = 'Seg%d-Frag%d' % (seg_i, frag_i)
  297. url = base_url + name
  298. if akamai_pv:
  299. url += '?' + akamai_pv.strip(';')
  300. frag_filename = '%s-%s' % (tmpfilename, name)
  301. success = http_dl.download(frag_filename, {'url': url})
  302. if not success:
  303. return False
  304. with open(frag_filename, 'rb') as down:
  305. down_data = down.read()
  306. reader = FlvReader(down_data)
  307. while True:
  308. _, box_type, box_data = reader.read_box_info()
  309. if box_type == b'mdat':
  310. dest_stream.write(box_data)
  311. break
  312. frags_filenames.append(frag_filename)
  313. dest_stream.close()
  314. elapsed = time.time() - start
  315. self.try_rename(tmpfilename, filename)
  316. for frag_file in frags_filenames:
  317. os.remove(frag_file)
  318. fsize = os.path.getsize(encodeFilename(filename))
  319. self._hook_progress({
  320. 'downloaded_bytes': fsize,
  321. 'total_bytes': fsize,
  322. 'filename': filename,
  323. 'status': 'finished',
  324. 'elapsed': elapsed,
  325. })
  326. return True