You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

521 lines
18 KiB

11 years ago
11 years ago
  1. import os
  2. import subprocess
  3. import sys
  4. import time
  5. from .utils import (
  6. compat_subprocess_get_DEVNULL,
  7. encodeFilename,
  8. PostProcessingError,
  9. shell_quote,
  10. subtitles_filename,
  11. prepend_extension,
  12. )
  13. class PostProcessor(object):
  14. """Post Processor class.
  15. PostProcessor objects can be added to downloaders with their
  16. add_post_processor() method. When the downloader has finished a
  17. successful download, it will take its internal chain of PostProcessors
  18. and start calling the run() method on each one of them, first with
  19. an initial argument and then with the returned value of the previous
  20. PostProcessor.
  21. The chain will be stopped if one of them ever returns None or the end
  22. of the chain is reached.
  23. PostProcessor objects follow a "mutual registration" process similar
  24. to InfoExtractor objects.
  25. """
  26. _downloader = None
  27. def __init__(self, downloader=None):
  28. self._downloader = downloader
  29. def set_downloader(self, downloader):
  30. """Sets the downloader for this PP."""
  31. self._downloader = downloader
  32. def run(self, information):
  33. """Run the PostProcessor.
  34. The "information" argument is a dictionary like the ones
  35. composed by InfoExtractors. The only difference is that this
  36. one has an extra field called "filepath" that points to the
  37. downloaded file.
  38. This method returns a tuple, the first element of which describes
  39. whether the original file should be kept (i.e. not deleted - None for
  40. no preference), and the second of which is the updated information.
  41. In addition, this method may raise a PostProcessingError
  42. exception if post processing fails.
  43. """
  44. return None, information # by default, keep file and do nothing
  45. class FFmpegPostProcessorError(PostProcessingError):
  46. pass
  47. class AudioConversionError(PostProcessingError):
  48. pass
  49. class FFmpegPostProcessor(PostProcessor):
  50. def __init__(self,downloader=None):
  51. PostProcessor.__init__(self, downloader)
  52. self._exes = self.detect_executables()
  53. @staticmethod
  54. def detect_executables():
  55. def executable(exe):
  56. try:
  57. subprocess.Popen([exe, '-version'], stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
  58. except OSError:
  59. return False
  60. return exe
  61. programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe']
  62. return dict((program, executable(program)) for program in programs)
  63. def run_ffmpeg_multiple_files(self, input_paths, out_path, opts):
  64. if not self._exes['ffmpeg'] and not self._exes['avconv']:
  65. raise FFmpegPostProcessorError(u'ffmpeg or avconv not found. Please install one.')
  66. files_cmd = []
  67. for path in input_paths:
  68. files_cmd.extend(['-i', encodeFilename(path, True)])
  69. cmd = ([self._exes['avconv'] or self._exes['ffmpeg'], '-y'] + files_cmd
  70. + opts +
  71. [encodeFilename(self._ffmpeg_filename_argument(out_path), True)])
  72. if self._downloader.params.get('verbose', False):
  73. self._downloader.to_screen(u'[debug] ffmpeg command line: %s' % shell_quote(cmd))
  74. p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
  75. stdout,stderr = p.communicate()
  76. if p.returncode != 0:
  77. stderr = stderr.decode('utf-8', 'replace')
  78. msg = stderr.strip().split('\n')[-1]
  79. raise FFmpegPostProcessorError(msg)
  80. def run_ffmpeg(self, path, out_path, opts):
  81. self.run_ffmpeg_multiple_files([path], out_path, opts)
  82. def _ffmpeg_filename_argument(self, fn):
  83. # ffmpeg broke --, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details
  84. if fn.startswith(u'-'):
  85. return u'./' + fn
  86. return fn
  87. class FFmpegExtractAudioPP(FFmpegPostProcessor):
  88. def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, nopostoverwrites=False):
  89. FFmpegPostProcessor.__init__(self, downloader)
  90. if preferredcodec is None:
  91. preferredcodec = 'best'
  92. self._preferredcodec = preferredcodec
  93. self._preferredquality = preferredquality
  94. self._nopostoverwrites = nopostoverwrites
  95. def get_audio_codec(self, path):
  96. if not self._exes['ffprobe'] and not self._exes['avprobe']:
  97. raise PostProcessingError(u'ffprobe or avprobe not found. Please install one.')
  98. try:
  99. cmd = [
  100. self._exes['avprobe'] or self._exes['ffprobe'],
  101. '-show_streams',
  102. encodeFilename(self._ffmpeg_filename_argument(path), True)]
  103. handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE)
  104. output = handle.communicate()[0]
  105. if handle.wait() != 0:
  106. return None
  107. except (IOError, OSError):
  108. return None
  109. audio_codec = None
  110. for line in output.decode('ascii', 'ignore').split('\n'):
  111. if line.startswith('codec_name='):
  112. audio_codec = line.split('=')[1].strip()
  113. elif line.strip() == 'codec_type=audio' and audio_codec is not None:
  114. return audio_codec
  115. return None
  116. def run_ffmpeg(self, path, out_path, codec, more_opts):
  117. if not self._exes['ffmpeg'] and not self._exes['avconv']:
  118. raise AudioConversionError('ffmpeg or avconv not found. Please install one.')
  119. if codec is None:
  120. acodec_opts = []
  121. else:
  122. acodec_opts = ['-acodec', codec]
  123. opts = ['-vn'] + acodec_opts + more_opts
  124. try:
  125. FFmpegPostProcessor.run_ffmpeg(self, path, out_path, opts)
  126. except FFmpegPostProcessorError as err:
  127. raise AudioConversionError(err.msg)
  128. def run(self, information):
  129. path = information['filepath']
  130. filecodec = self.get_audio_codec(path)
  131. if filecodec is None:
  132. raise PostProcessingError(u'WARNING: unable to obtain file audio codec with ffprobe')
  133. more_opts = []
  134. if self._preferredcodec == 'best' or self._preferredcodec == filecodec or (self._preferredcodec == 'm4a' and filecodec == 'aac'):
  135. if filecodec == 'aac' and self._preferredcodec in ['m4a', 'best']:
  136. # Lossless, but in another container
  137. acodec = 'copy'
  138. extension = 'm4a'
  139. more_opts = [self._exes['avconv'] and '-bsf:a' or '-absf', 'aac_adtstoasc']
  140. elif filecodec in ['aac', 'mp3', 'vorbis', 'opus']:
  141. # Lossless if possible
  142. acodec = 'copy'
  143. extension = filecodec
  144. if filecodec == 'aac':
  145. more_opts = ['-f', 'adts']
  146. if filecodec == 'vorbis':
  147. extension = 'ogg'
  148. else:
  149. # MP3 otherwise.
  150. acodec = 'libmp3lame'
  151. extension = 'mp3'
  152. more_opts = []
  153. if self._preferredquality is not None:
  154. if int(self._preferredquality) < 10:
  155. more_opts += [self._exes['avconv'] and '-q:a' or '-aq', self._preferredquality]
  156. else:
  157. more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality + 'k']
  158. else:
  159. # We convert the audio (lossy)
  160. acodec = {'mp3': 'libmp3lame', 'aac': 'aac', 'm4a': 'aac', 'opus': 'opus', 'vorbis': 'libvorbis', 'wav': None}[self._preferredcodec]
  161. extension = self._preferredcodec
  162. more_opts = []
  163. if self._preferredquality is not None:
  164. # The opus codec doesn't support the -aq option
  165. if int(self._preferredquality) < 10 and extension != 'opus':
  166. more_opts += [self._exes['avconv'] and '-q:a' or '-aq', self._preferredquality]
  167. else:
  168. more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality + 'k']
  169. if self._preferredcodec == 'aac':
  170. more_opts += ['-f', 'adts']
  171. if self._preferredcodec == 'm4a':
  172. more_opts += [self._exes['avconv'] and '-bsf:a' or '-absf', 'aac_adtstoasc']
  173. if self._preferredcodec == 'vorbis':
  174. extension = 'ogg'
  175. if self._preferredcodec == 'wav':
  176. extension = 'wav'
  177. more_opts += ['-f', 'wav']
  178. prefix, sep, ext = path.rpartition(u'.') # not os.path.splitext, since the latter does not work on unicode in all setups
  179. new_path = prefix + sep + extension
  180. # If we download foo.mp3 and convert it to... foo.mp3, then don't delete foo.mp3, silly.
  181. if new_path == path:
  182. self._nopostoverwrites = True
  183. try:
  184. if self._nopostoverwrites and os.path.exists(encodeFilename(new_path)):
  185. self._downloader.to_screen(u'[youtube] Post-process file %s exists, skipping' % new_path)
  186. else:
  187. self._downloader.to_screen(u'[' + (self._exes['avconv'] and 'avconv' or 'ffmpeg') + '] Destination: ' + new_path)
  188. self.run_ffmpeg(path, new_path, acodec, more_opts)
  189. except:
  190. etype,e,tb = sys.exc_info()
  191. if isinstance(e, AudioConversionError):
  192. msg = u'audio conversion failed: ' + e.msg
  193. else:
  194. msg = u'error running ' + (self._exes['avconv'] and 'avconv' or 'ffmpeg')
  195. raise PostProcessingError(msg)
  196. # Try to update the date time for extracted audio file.
  197. if information.get('filetime') is not None:
  198. try:
  199. os.utime(encodeFilename(new_path), (time.time(), information['filetime']))
  200. except:
  201. self._downloader.report_warning(u'Cannot update utime of audio file')
  202. information['filepath'] = new_path
  203. return self._nopostoverwrites,information
  204. class FFmpegVideoConvertor(FFmpegPostProcessor):
  205. def __init__(self, downloader=None,preferedformat=None):
  206. super(FFmpegVideoConvertor, self).__init__(downloader)
  207. self._preferedformat=preferedformat
  208. def run(self, information):
  209. path = information['filepath']
  210. prefix, sep, ext = path.rpartition(u'.')
  211. outpath = prefix + sep + self._preferedformat
  212. if information['ext'] == self._preferedformat:
  213. self._downloader.to_screen(u'[ffmpeg] Not converting video file %s - already is in target format %s' % (path, self._preferedformat))
  214. return True,information
  215. self._downloader.to_screen(u'['+'ffmpeg'+'] Converting video from %s to %s, Destination: ' % (information['ext'], self._preferedformat) +outpath)
  216. self.run_ffmpeg(path, outpath, [])
  217. information['filepath'] = outpath
  218. information['format'] = self._preferedformat
  219. information['ext'] = self._preferedformat
  220. return False,information
  221. class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
  222. # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
  223. _lang_map = {
  224. 'aa': 'aar',
  225. 'ab': 'abk',
  226. 'ae': 'ave',
  227. 'af': 'afr',
  228. 'ak': 'aka',
  229. 'am': 'amh',
  230. 'an': 'arg',
  231. 'ar': 'ara',
  232. 'as': 'asm',
  233. 'av': 'ava',
  234. 'ay': 'aym',
  235. 'az': 'aze',
  236. 'ba': 'bak',
  237. 'be': 'bel',
  238. 'bg': 'bul',
  239. 'bh': 'bih',
  240. 'bi': 'bis',
  241. 'bm': 'bam',
  242. 'bn': 'ben',
  243. 'bo': 'bod',
  244. 'br': 'bre',
  245. 'bs': 'bos',
  246. 'ca': 'cat',
  247. 'ce': 'che',
  248. 'ch': 'cha',
  249. 'co': 'cos',
  250. 'cr': 'cre',
  251. 'cs': 'ces',
  252. 'cu': 'chu',
  253. 'cv': 'chv',
  254. 'cy': 'cym',
  255. 'da': 'dan',
  256. 'de': 'deu',
  257. 'dv': 'div',
  258. 'dz': 'dzo',
  259. 'ee': 'ewe',
  260. 'el': 'ell',
  261. 'en': 'eng',
  262. 'eo': 'epo',
  263. 'es': 'spa',
  264. 'et': 'est',
  265. 'eu': 'eus',
  266. 'fa': 'fas',
  267. 'ff': 'ful',
  268. 'fi': 'fin',
  269. 'fj': 'fij',
  270. 'fo': 'fao',
  271. 'fr': 'fra',
  272. 'fy': 'fry',
  273. 'ga': 'gle',
  274. 'gd': 'gla',
  275. 'gl': 'glg',
  276. 'gn': 'grn',
  277. 'gu': 'guj',
  278. 'gv': 'glv',
  279. 'ha': 'hau',
  280. 'he': 'heb',
  281. 'hi': 'hin',
  282. 'ho': 'hmo',
  283. 'hr': 'hrv',
  284. 'ht': 'hat',
  285. 'hu': 'hun',
  286. 'hy': 'hye',
  287. 'hz': 'her',
  288. 'ia': 'ina',
  289. 'id': 'ind',
  290. 'ie': 'ile',
  291. 'ig': 'ibo',
  292. 'ii': 'iii',
  293. 'ik': 'ipk',
  294. 'io': 'ido',
  295. 'is': 'isl',
  296. 'it': 'ita',
  297. 'iu': 'iku',
  298. 'ja': 'jpn',
  299. 'jv': 'jav',
  300. 'ka': 'kat',
  301. 'kg': 'kon',
  302. 'ki': 'kik',
  303. 'kj': 'kua',
  304. 'kk': 'kaz',
  305. 'kl': 'kal',
  306. 'km': 'khm',
  307. 'kn': 'kan',
  308. 'ko': 'kor',
  309. 'kr': 'kau',
  310. 'ks': 'kas',
  311. 'ku': 'kur',
  312. 'kv': 'kom',
  313. 'kw': 'cor',
  314. 'ky': 'kir',
  315. 'la': 'lat',
  316. 'lb': 'ltz',
  317. 'lg': 'lug',
  318. 'li': 'lim',
  319. 'ln': 'lin',
  320. 'lo': 'lao',
  321. 'lt': 'lit',
  322. 'lu': 'lub',
  323. 'lv': 'lav',
  324. 'mg': 'mlg',
  325. 'mh': 'mah',
  326. 'mi': 'mri',
  327. 'mk': 'mkd',
  328. 'ml': 'mal',
  329. 'mn': 'mon',
  330. 'mr': 'mar',
  331. 'ms': 'msa',
  332. 'mt': 'mlt',
  333. 'my': 'mya',
  334. 'na': 'nau',
  335. 'nb': 'nob',
  336. 'nd': 'nde',
  337. 'ne': 'nep',
  338. 'ng': 'ndo',
  339. 'nl': 'nld',
  340. 'nn': 'nno',
  341. 'no': 'nor',
  342. 'nr': 'nbl',
  343. 'nv': 'nav',
  344. 'ny': 'nya',
  345. 'oc': 'oci',
  346. 'oj': 'oji',
  347. 'om': 'orm',
  348. 'or': 'ori',
  349. 'os': 'oss',
  350. 'pa': 'pan',
  351. 'pi': 'pli',
  352. 'pl': 'pol',
  353. 'ps': 'pus',
  354. 'pt': 'por',
  355. 'qu': 'que',
  356. 'rm': 'roh',
  357. 'rn': 'run',
  358. 'ro': 'ron',
  359. 'ru': 'rus',
  360. 'rw': 'kin',
  361. 'sa': 'san',
  362. 'sc': 'srd',
  363. 'sd': 'snd',
  364. 'se': 'sme',
  365. 'sg': 'sag',
  366. 'si': 'sin',
  367. 'sk': 'slk',
  368. 'sl': 'slv',
  369. 'sm': 'smo',
  370. 'sn': 'sna',
  371. 'so': 'som',
  372. 'sq': 'sqi',
  373. 'sr': 'srp',
  374. 'ss': 'ssw',
  375. 'st': 'sot',
  376. 'su': 'sun',
  377. 'sv': 'swe',
  378. 'sw': 'swa',
  379. 'ta': 'tam',
  380. 'te': 'tel',
  381. 'tg': 'tgk',
  382. 'th': 'tha',
  383. 'ti': 'tir',
  384. 'tk': 'tuk',
  385. 'tl': 'tgl',
  386. 'tn': 'tsn',
  387. 'to': 'ton',
  388. 'tr': 'tur',
  389. 'ts': 'tso',
  390. 'tt': 'tat',
  391. 'tw': 'twi',
  392. 'ty': 'tah',
  393. 'ug': 'uig',
  394. 'uk': 'ukr',
  395. 'ur': 'urd',
  396. 'uz': 'uzb',
  397. 've': 'ven',
  398. 'vi': 'vie',
  399. 'vo': 'vol',
  400. 'wa': 'wln',
  401. 'wo': 'wol',
  402. 'xh': 'xho',
  403. 'yi': 'yid',
  404. 'yo': 'yor',
  405. 'za': 'zha',
  406. 'zh': 'zho',
  407. 'zu': 'zul',
  408. }
  409. def __init__(self, downloader=None, subtitlesformat='srt'):
  410. super(FFmpegEmbedSubtitlePP, self).__init__(downloader)
  411. self._subformat = subtitlesformat
  412. @classmethod
  413. def _conver_lang_code(cls, code):
  414. """Convert language code from ISO 639-1 to ISO 639-2/T"""
  415. return cls._lang_map.get(code[:2])
  416. def run(self, information):
  417. if information['ext'] != u'mp4':
  418. self._downloader.to_screen(u'[ffmpeg] Subtitles can only be embedded in mp4 files')
  419. return True, information
  420. if not information.get('subtitles'):
  421. self._downloader.to_screen(u'[ffmpeg] There aren\'t any subtitles to embed')
  422. return True, information
  423. sub_langs = [key for key in information['subtitles']]
  424. filename = information['filepath']
  425. input_files = [filename] + [subtitles_filename(filename, lang, self._subformat) for lang in sub_langs]
  426. opts = ['-map', '0:0', '-map', '0:1', '-c:v', 'copy', '-c:a', 'copy']
  427. for (i, lang) in enumerate(sub_langs):
  428. opts.extend(['-map', '%d:0' % (i+1), '-c:s:%d' % i, 'mov_text'])
  429. lang_code = self._conver_lang_code(lang)
  430. if lang_code is not None:
  431. opts.extend(['-metadata:s:s:%d' % i, 'language=%s' % lang_code])
  432. opts.extend(['-f', 'mp4'])
  433. temp_filename = filename + u'.temp'
  434. self._downloader.to_screen(u'[ffmpeg] Embedding subtitles in \'%s\'' % filename)
  435. self.run_ffmpeg_multiple_files(input_files, temp_filename, opts)
  436. os.remove(encodeFilename(filename))
  437. os.rename(encodeFilename(temp_filename), encodeFilename(filename))
  438. return True, information
  439. class FFmpegMetadataPP(FFmpegPostProcessor):
  440. def run(self, info):
  441. metadata = {}
  442. if info.get('title') is not None:
  443. metadata['title'] = info['title']
  444. if info.get('upload_date') is not None:
  445. metadata['date'] = info['upload_date']
  446. if info.get('uploader') is not None:
  447. metadata['artist'] = info['uploader']
  448. elif info.get('uploader_id') is not None:
  449. metadata['artist'] = info['uploader_id']
  450. if not metadata:
  451. self._downloader.to_screen(u'[ffmpeg] There isn\'t any metadata to add')
  452. return True, info
  453. filename = info['filepath']
  454. temp_filename = prepend_extension(filename, 'temp')
  455. options = ['-c', 'copy']
  456. for (name, value) in metadata.items():
  457. options.extend(['-metadata', '%s=%s' % (name, value)])
  458. self._downloader.to_screen(u'[ffmpeg] Adding metadata to \'%s\'' % filename)
  459. self.run_ffmpeg(filename, temp_filename, options)
  460. os.remove(encodeFilename(filename))
  461. os.rename(encodeFilename(temp_filename), encodeFilename(filename))
  462. return True, info
  463. class FFmpegMergerPP(FFmpegPostProcessor):
  464. def run(self, info):
  465. filename = info['filepath']
  466. args = ['-c', 'copy']
  467. self.run_ffmpeg_multiple_files(info['__files_to_merge'], filename, args)
  468. return True, info