You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

383 lines
12 KiB

10 years ago
11 years ago
11 years ago
11 years ago
10 years ago
  1. from __future__ import division, unicode_literals
  2. import base64
  3. import io
  4. import itertools
  5. import os
  6. import time
  7. import xml.etree.ElementTree as etree
  8. from .fragment import FragmentFD
  9. from ..compat import (
  10. compat_urlparse,
  11. compat_urllib_error,
  12. )
  13. from ..utils import (
  14. encodeFilename,
  15. sanitize_open,
  16. struct_pack,
  17. struct_unpack,
  18. xpath_text,
  19. )
  20. class FlvReader(io.BytesIO):
  21. """
  22. Reader for Flv files
  23. The file format is documented in https://www.adobe.com/devnet/f4v.html
  24. """
  25. # Utility functions for reading numbers and strings
  26. def read_unsigned_long_long(self):
  27. return struct_unpack('!Q', self.read(8))[0]
  28. def read_unsigned_int(self):
  29. return struct_unpack('!I', self.read(4))[0]
  30. def read_unsigned_char(self):
  31. return struct_unpack('!B', self.read(1))[0]
  32. def read_string(self):
  33. res = b''
  34. while True:
  35. char = self.read(1)
  36. if char == b'\x00':
  37. break
  38. res += char
  39. return res
  40. def read_box_info(self):
  41. """
  42. Read a box and return the info as a tuple: (box_size, box_type, box_data)
  43. """
  44. real_size = size = self.read_unsigned_int()
  45. box_type = self.read(4)
  46. header_end = 8
  47. if size == 1:
  48. real_size = self.read_unsigned_long_long()
  49. header_end = 16
  50. return real_size, box_type, self.read(real_size - header_end)
  51. def read_asrt(self):
  52. # version
  53. self.read_unsigned_char()
  54. # flags
  55. self.read(3)
  56. quality_entry_count = self.read_unsigned_char()
  57. # QualityEntryCount
  58. for i in range(quality_entry_count):
  59. self.read_string()
  60. segment_run_count = self.read_unsigned_int()
  61. segments = []
  62. for i in range(segment_run_count):
  63. first_segment = self.read_unsigned_int()
  64. fragments_per_segment = self.read_unsigned_int()
  65. segments.append((first_segment, fragments_per_segment))
  66. return {
  67. 'segment_run': segments,
  68. }
  69. def read_afrt(self):
  70. # version
  71. self.read_unsigned_char()
  72. # flags
  73. self.read(3)
  74. # time scale
  75. self.read_unsigned_int()
  76. quality_entry_count = self.read_unsigned_char()
  77. # QualitySegmentUrlModifiers
  78. for i in range(quality_entry_count):
  79. self.read_string()
  80. fragments_count = self.read_unsigned_int()
  81. fragments = []
  82. for i in range(fragments_count):
  83. first = self.read_unsigned_int()
  84. first_ts = self.read_unsigned_long_long()
  85. duration = self.read_unsigned_int()
  86. if duration == 0:
  87. discontinuity_indicator = self.read_unsigned_char()
  88. else:
  89. discontinuity_indicator = None
  90. fragments.append({
  91. 'first': first,
  92. 'ts': first_ts,
  93. 'duration': duration,
  94. 'discontinuity_indicator': discontinuity_indicator,
  95. })
  96. return {
  97. 'fragments': fragments,
  98. }
  99. def read_abst(self):
  100. # version
  101. self.read_unsigned_char()
  102. # flags
  103. self.read(3)
  104. self.read_unsigned_int() # BootstrapinfoVersion
  105. # Profile,Live,Update,Reserved
  106. flags = self.read_unsigned_char()
  107. live = flags & 0x20 != 0
  108. # time scale
  109. self.read_unsigned_int()
  110. # CurrentMediaTime
  111. self.read_unsigned_long_long()
  112. # SmpteTimeCodeOffset
  113. self.read_unsigned_long_long()
  114. self.read_string() # MovieIdentifier
  115. server_count = self.read_unsigned_char()
  116. # ServerEntryTable
  117. for i in range(server_count):
  118. self.read_string()
  119. quality_count = self.read_unsigned_char()
  120. # QualityEntryTable
  121. for i in range(quality_count):
  122. self.read_string()
  123. # DrmData
  124. self.read_string()
  125. # MetaData
  126. self.read_string()
  127. segments_count = self.read_unsigned_char()
  128. segments = []
  129. for i in range(segments_count):
  130. box_size, box_type, box_data = self.read_box_info()
  131. assert box_type == b'asrt'
  132. segment = FlvReader(box_data).read_asrt()
  133. segments.append(segment)
  134. fragments_run_count = self.read_unsigned_char()
  135. fragments = []
  136. for i in range(fragments_run_count):
  137. box_size, box_type, box_data = self.read_box_info()
  138. assert box_type == b'afrt'
  139. fragments.append(FlvReader(box_data).read_afrt())
  140. return {
  141. 'segments': segments,
  142. 'fragments': fragments,
  143. 'live': live,
  144. }
  145. def read_bootstrap_info(self):
  146. total_size, box_type, box_data = self.read_box_info()
  147. assert box_type == b'abst'
  148. return FlvReader(box_data).read_abst()
  149. def read_bootstrap_info(bootstrap_bytes):
  150. return FlvReader(bootstrap_bytes).read_bootstrap_info()
  151. def build_fragments_list(boot_info):
  152. """ Return a list of (segment, fragment) for each fragment in the video """
  153. res = []
  154. segment_run_table = boot_info['segments'][0]
  155. fragment_run_entry_table = boot_info['fragments'][0]['fragments']
  156. first_frag_number = fragment_run_entry_table[0]['first']
  157. fragments_counter = itertools.count(first_frag_number)
  158. for segment, fragments_count in segment_run_table['segment_run']:
  159. for _ in range(fragments_count):
  160. res.append((segment, next(fragments_counter)))
  161. if boot_info['live']:
  162. res = res[-2:]
  163. return res
  164. def write_unsigned_int(stream, val):
  165. stream.write(struct_pack('!I', val))
  166. def write_unsigned_int_24(stream, val):
  167. stream.write(struct_pack('!I', val)[1:])
  168. def write_flv_header(stream):
  169. """Writes the FLV header to stream"""
  170. # FLV header
  171. stream.write(b'FLV\x01')
  172. stream.write(b'\x05')
  173. stream.write(b'\x00\x00\x00\x09')
  174. stream.write(b'\x00\x00\x00\x00')
  175. def write_metadata_tag(stream, metadata):
  176. """Writes optional metadata tag to stream"""
  177. SCRIPT_TAG = b'\x12'
  178. FLV_TAG_HEADER_LEN = 11
  179. if metadata:
  180. stream.write(SCRIPT_TAG)
  181. write_unsigned_int_24(stream, len(metadata))
  182. stream.write(b'\x00\x00\x00\x00\x00\x00\x00')
  183. stream.write(metadata)
  184. write_unsigned_int(stream, FLV_TAG_HEADER_LEN + len(metadata))
  185. def _add_ns(prop):
  186. return '{http://ns.adobe.com/f4m/1.0}%s' % prop
  187. class F4mFD(FragmentFD):
  188. """
  189. A downloader for f4m manifests or AdobeHDS.
  190. """
  191. FD_NAME = 'f4m'
  192. def _get_unencrypted_media(self, doc):
  193. media = doc.findall(_add_ns('media'))
  194. if not media:
  195. self.report_error('No media found')
  196. for e in (doc.findall(_add_ns('drmAdditionalHeader')) +
  197. doc.findall(_add_ns('drmAdditionalHeaderSet'))):
  198. # If id attribute is missing it's valid for all media nodes
  199. # without drmAdditionalHeaderId or drmAdditionalHeaderSetId attribute
  200. if 'id' not in e.attrib:
  201. self.report_error('Missing ID in f4m DRM')
  202. media = list(filter(lambda e: 'drmAdditionalHeaderId' not in e.attrib and
  203. 'drmAdditionalHeaderSetId' not in e.attrib,
  204. media))
  205. if not media:
  206. self.report_error('Unsupported DRM')
  207. return media
  208. def _get_bootstrap_from_url(self, bootstrap_url):
  209. bootstrap = self.ydl.urlopen(bootstrap_url).read()
  210. return read_bootstrap_info(bootstrap)
  211. def _update_live_fragments(self, bootstrap_url, latest_fragment):
  212. fragments_list = []
  213. retries = 30
  214. while (not fragments_list) and (retries > 0):
  215. boot_info = self._get_bootstrap_from_url(bootstrap_url)
  216. fragments_list = build_fragments_list(boot_info)
  217. fragments_list = [f for f in fragments_list if f[1] > latest_fragment]
  218. if not fragments_list:
  219. # Retry after a while
  220. time.sleep(5.0)
  221. retries -= 1
  222. if not fragments_list:
  223. self.report_error('Failed to update fragments')
  224. return fragments_list
  225. def _parse_bootstrap_node(self, node, base_url):
  226. if node.text is None:
  227. bootstrap_url = compat_urlparse.urljoin(
  228. base_url, node.attrib['url'])
  229. boot_info = self._get_bootstrap_from_url(bootstrap_url)
  230. else:
  231. bootstrap_url = None
  232. bootstrap = base64.b64decode(node.text.encode('ascii'))
  233. boot_info = read_bootstrap_info(bootstrap)
  234. return (boot_info, bootstrap_url)
  235. def real_download(self, filename, info_dict):
  236. man_url = info_dict['url']
  237. requested_bitrate = info_dict.get('tbr')
  238. self.to_screen('[%s] Downloading f4m manifest' % self.FD_NAME)
  239. manifest = self.ydl.urlopen(man_url).read()
  240. doc = etree.fromstring(manifest)
  241. formats = [(int(f.attrib.get('bitrate', -1)), f)
  242. for f in self._get_unencrypted_media(doc)]
  243. if requested_bitrate is None:
  244. # get the best format
  245. formats = sorted(formats, key=lambda f: f[0])
  246. rate, media = formats[-1]
  247. else:
  248. rate, media = list(filter(
  249. lambda f: int(f[0]) == requested_bitrate, formats))[0]
  250. base_url = compat_urlparse.urljoin(man_url, media.attrib['url'])
  251. bootstrap_node = doc.find(_add_ns('bootstrapInfo'))
  252. boot_info, bootstrap_url = self._parse_bootstrap_node(bootstrap_node, base_url)
  253. live = boot_info['live']
  254. metadata_node = media.find(_add_ns('metadata'))
  255. if metadata_node is not None:
  256. metadata = base64.b64decode(metadata_node.text.encode('ascii'))
  257. else:
  258. metadata = None
  259. fragments_list = build_fragments_list(boot_info)
  260. if self.params.get('test', False):
  261. # We only download the first fragment
  262. fragments_list = fragments_list[:1]
  263. total_frags = len(fragments_list)
  264. # For some akamai manifests we'll need to add a query to the fragment url
  265. akamai_pv = xpath_text(doc, _add_ns('pv-2.0'))
  266. ctx = {
  267. 'filename': filename,
  268. 'total_frags': total_frags,
  269. }
  270. self._prepare_frag_download(ctx)
  271. dest_stream = ctx['dest_stream']
  272. write_flv_header(dest_stream)
  273. if not live:
  274. write_metadata_tag(dest_stream, metadata)
  275. self._start_frag_download(ctx)
  276. frags_filenames = []
  277. while fragments_list:
  278. seg_i, frag_i = fragments_list.pop(0)
  279. name = 'Seg%d-Frag%d' % (seg_i, frag_i)
  280. url = base_url + name
  281. if akamai_pv:
  282. url += '?' + akamai_pv.strip(';')
  283. if info_dict.get('extra_param_to_segment_url'):
  284. url += info_dict.get('extra_param_to_segment_url')
  285. frag_filename = '%s-%s' % (ctx['tmpfilename'], name)
  286. try:
  287. success = ctx['dl'].download(frag_filename, {'url': url})
  288. if not success:
  289. return False
  290. (down, frag_sanitized) = sanitize_open(frag_filename, 'rb')
  291. down_data = down.read()
  292. down.close()
  293. reader = FlvReader(down_data)
  294. while True:
  295. _, box_type, box_data = reader.read_box_info()
  296. if box_type == b'mdat':
  297. dest_stream.write(box_data)
  298. break
  299. if live:
  300. os.remove(encodeFilename(frag_sanitized))
  301. else:
  302. frags_filenames.append(frag_sanitized)
  303. except (compat_urllib_error.HTTPError, ) as err:
  304. if live and (err.code == 404 or err.code == 410):
  305. # We didn't keep up with the live window. Continue
  306. # with the next available fragment.
  307. msg = 'Fragment %d unavailable' % frag_i
  308. self.report_warning(msg)
  309. fragments_list = []
  310. else:
  311. raise
  312. if not fragments_list and live and bootstrap_url:
  313. fragments_list = self._update_live_fragments(bootstrap_url, frag_i)
  314. total_frags += len(fragments_list)
  315. if fragments_list and (fragments_list[0][1] > frag_i + 1):
  316. msg = 'Missed %d fragments' % (fragments_list[0][1] - (frag_i + 1))
  317. self.report_warning(msg)
  318. self._finish_frag_download(ctx)
  319. for frag_file in frags_filenames:
  320. os.remove(encodeFilename(frag_file))
  321. return True