You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

466 lines
16 KiB

  1. import os
  2. import subprocess
  3. import sys
  4. import time
  5. from .utils import *
  6. class PostProcessor(object):
  7. """Post Processor class.
  8. PostProcessor objects can be added to downloaders with their
  9. add_post_processor() method. When the downloader has finished a
  10. successful download, it will take its internal chain of PostProcessors
  11. and start calling the run() method on each one of them, first with
  12. an initial argument and then with the returned value of the previous
  13. PostProcessor.
  14. The chain will be stopped if one of them ever returns None or the end
  15. of the chain is reached.
  16. PostProcessor objects follow a "mutual registration" process similar
  17. to InfoExtractor objects.
  18. """
  19. _downloader = None
  20. def __init__(self, downloader=None):
  21. self._downloader = downloader
  22. def set_downloader(self, downloader):
  23. """Sets the downloader for this PP."""
  24. self._downloader = downloader
  25. def run(self, information):
  26. """Run the PostProcessor.
  27. The "information" argument is a dictionary like the ones
  28. composed by InfoExtractors. The only difference is that this
  29. one has an extra field called "filepath" that points to the
  30. downloaded file.
  31. This method returns a tuple, the first element of which describes
  32. whether the original file should be kept (i.e. not deleted - None for
  33. no preference), and the second of which is the updated information.
  34. In addition, this method may raise a PostProcessingError
  35. exception if post processing fails.
  36. """
  37. return None, information # by default, keep file and do nothing
  38. class FFmpegPostProcessorError(PostProcessingError):
  39. pass
  40. class AudioConversionError(PostProcessingError):
  41. pass
  42. class FFmpegPostProcessor(PostProcessor):
  43. def __init__(self,downloader=None):
  44. PostProcessor.__init__(self, downloader)
  45. self._exes = self.detect_executables()
  46. @staticmethod
  47. def detect_executables():
  48. def executable(exe):
  49. try:
  50. subprocess.Popen([exe, '-version'], stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
  51. except OSError:
  52. return False
  53. return exe
  54. programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe']
  55. return dict((program, executable(program)) for program in programs)
  56. def run_ffmpeg_multiple_files(self, input_paths, out_path, opts):
  57. if not self._exes['ffmpeg'] and not self._exes['avconv']:
  58. raise FFmpegPostProcessorError(u'ffmpeg or avconv not found. Please install one.')
  59. files_cmd = []
  60. for path in input_paths:
  61. files_cmd.extend(['-i', encodeFilename(path)])
  62. cmd = ([self._exes['avconv'] or self._exes['ffmpeg'], '-y'] + files_cmd
  63. + opts +
  64. [encodeFilename(self._ffmpeg_filename_argument(out_path))])
  65. p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
  66. stdout,stderr = p.communicate()
  67. if p.returncode != 0:
  68. stderr = stderr.decode('utf-8', 'replace')
  69. msg = stderr.strip().split('\n')[-1]
  70. raise FFmpegPostProcessorError(msg)
  71. def run_ffmpeg(self, path, out_path, opts):
  72. self.run_ffmpeg_multiple_files([path], out_path, opts)
  73. def _ffmpeg_filename_argument(self, fn):
  74. # ffmpeg broke --, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details
  75. if fn.startswith(u'-'):
  76. return u'./' + fn
  77. return fn
  78. class FFmpegExtractAudioPP(FFmpegPostProcessor):
  79. def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, nopostoverwrites=False):
  80. FFmpegPostProcessor.__init__(self, downloader)
  81. if preferredcodec is None:
  82. preferredcodec = 'best'
  83. self._preferredcodec = preferredcodec
  84. self._preferredquality = preferredquality
  85. self._nopostoverwrites = nopostoverwrites
  86. def get_audio_codec(self, path):
  87. if not self._exes['ffprobe'] and not self._exes['avprobe']:
  88. raise PostProcessingError(u'ffprobe or avprobe not found. Please install one.')
  89. try:
  90. cmd = [self._exes['avprobe'] or self._exes['ffprobe'], '-show_streams', encodeFilename(self._ffmpeg_filename_argument(path))]
  91. handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE)
  92. output = handle.communicate()[0]
  93. if handle.wait() != 0:
  94. return None
  95. except (IOError, OSError):
  96. return None
  97. audio_codec = None
  98. for line in output.decode('ascii', 'ignore').split('\n'):
  99. if line.startswith('codec_name='):
  100. audio_codec = line.split('=')[1].strip()
  101. elif line.strip() == 'codec_type=audio' and audio_codec is not None:
  102. return audio_codec
  103. return None
  104. def run_ffmpeg(self, path, out_path, codec, more_opts):
  105. if not self._exes['ffmpeg'] and not self._exes['avconv']:
  106. raise AudioConversionError('ffmpeg or avconv not found. Please install one.')
  107. if codec is None:
  108. acodec_opts = []
  109. else:
  110. acodec_opts = ['-acodec', codec]
  111. opts = ['-vn'] + acodec_opts + more_opts
  112. try:
  113. FFmpegPostProcessor.run_ffmpeg(self, path, out_path, opts)
  114. except FFmpegPostProcessorError as err:
  115. raise AudioConversionError(err.msg)
  116. def run(self, information):
  117. path = information['filepath']
  118. filecodec = self.get_audio_codec(path)
  119. if filecodec is None:
  120. raise PostProcessingError(u'WARNING: unable to obtain file audio codec with ffprobe')
  121. more_opts = []
  122. if self._preferredcodec == 'best' or self._preferredcodec == filecodec or (self._preferredcodec == 'm4a' and filecodec == 'aac'):
  123. if filecodec == 'aac' and self._preferredcodec in ['m4a', 'best']:
  124. # Lossless, but in another container
  125. acodec = 'copy'
  126. extension = 'm4a'
  127. more_opts = [self._exes['avconv'] and '-bsf:a' or '-absf', 'aac_adtstoasc']
  128. elif filecodec in ['aac', 'mp3', 'vorbis', 'opus']:
  129. # Lossless if possible
  130. acodec = 'copy'
  131. extension = filecodec
  132. if filecodec == 'aac':
  133. more_opts = ['-f', 'adts']
  134. if filecodec == 'vorbis':
  135. extension = 'ogg'
  136. else:
  137. # MP3 otherwise.
  138. acodec = 'libmp3lame'
  139. extension = 'mp3'
  140. more_opts = []
  141. if self._preferredquality is not None:
  142. if int(self._preferredquality) < 10:
  143. more_opts += [self._exes['avconv'] and '-q:a' or '-aq', self._preferredquality]
  144. else:
  145. more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality + 'k']
  146. else:
  147. # We convert the audio (lossy)
  148. acodec = {'mp3': 'libmp3lame', 'aac': 'aac', 'm4a': 'aac', 'opus': 'opus', 'vorbis': 'libvorbis', 'wav': None}[self._preferredcodec]
  149. extension = self._preferredcodec
  150. more_opts = []
  151. if self._preferredquality is not None:
  152. if int(self._preferredquality) < 10:
  153. more_opts += [self._exes['avconv'] and '-q:a' or '-aq', self._preferredquality]
  154. else:
  155. more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality + 'k']
  156. if self._preferredcodec == 'aac':
  157. more_opts += ['-f', 'adts']
  158. if self._preferredcodec == 'm4a':
  159. more_opts += [self._exes['avconv'] and '-bsf:a' or '-absf', 'aac_adtstoasc']
  160. if self._preferredcodec == 'vorbis':
  161. extension = 'ogg'
  162. if self._preferredcodec == 'wav':
  163. extension = 'wav'
  164. more_opts += ['-f', 'wav']
  165. prefix, sep, ext = path.rpartition(u'.') # not os.path.splitext, since the latter does not work on unicode in all setups
  166. new_path = prefix + sep + extension
  167. # If we download foo.mp3 and convert it to... foo.mp3, then don't delete foo.mp3, silly.
  168. if new_path == path:
  169. self._nopostoverwrites = True
  170. try:
  171. if self._nopostoverwrites and os.path.exists(encodeFilename(new_path)):
  172. self._downloader.to_screen(u'[youtube] Post-process file %s exists, skipping' % new_path)
  173. else:
  174. self._downloader.to_screen(u'[' + (self._exes['avconv'] and 'avconv' or 'ffmpeg') + '] Destination: ' + new_path)
  175. self.run_ffmpeg(path, new_path, acodec, more_opts)
  176. except:
  177. etype,e,tb = sys.exc_info()
  178. if isinstance(e, AudioConversionError):
  179. msg = u'audio conversion failed: ' + e.msg
  180. else:
  181. msg = u'error running ' + (self._exes['avconv'] and 'avconv' or 'ffmpeg')
  182. raise PostProcessingError(msg)
  183. # Try to update the date time for extracted audio file.
  184. if information.get('filetime') is not None:
  185. try:
  186. os.utime(encodeFilename(new_path), (time.time(), information['filetime']))
  187. except:
  188. self._downloader.report_warning(u'Cannot update utime of audio file')
  189. information['filepath'] = new_path
  190. return self._nopostoverwrites,information
  191. class FFmpegVideoConvertor(FFmpegPostProcessor):
  192. def __init__(self, downloader=None,preferedformat=None):
  193. super(FFmpegVideoConvertor, self).__init__(downloader)
  194. self._preferedformat=preferedformat
  195. def run(self, information):
  196. path = information['filepath']
  197. prefix, sep, ext = path.rpartition(u'.')
  198. outpath = prefix + sep + self._preferedformat
  199. if information['ext'] == self._preferedformat:
  200. self._downloader.to_screen(u'[ffmpeg] Not converting video file %s - already is in target format %s' % (path, self._preferedformat))
  201. return True,information
  202. self._downloader.to_screen(u'['+'ffmpeg'+'] Converting video from %s to %s, Destination: ' % (information['ext'], self._preferedformat) +outpath)
  203. self.run_ffmpeg(path, outpath, [])
  204. information['filepath'] = outpath
  205. information['format'] = self._preferedformat
  206. information['ext'] = self._preferedformat
  207. return False,information
  208. class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
  209. # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
  210. _lang_map = {
  211. 'aa': 'aar',
  212. 'ab': 'abk',
  213. 'ae': 'ave',
  214. 'af': 'afr',
  215. 'ak': 'aka',
  216. 'am': 'amh',
  217. 'an': 'arg',
  218. 'ar': 'ara',
  219. 'as': 'asm',
  220. 'av': 'ava',
  221. 'ay': 'aym',
  222. 'az': 'aze',
  223. 'ba': 'bak',
  224. 'be': 'bel',
  225. 'bg': 'bul',
  226. 'bh': 'bih',
  227. 'bi': 'bis',
  228. 'bm': 'bam',
  229. 'bn': 'ben',
  230. 'bo': 'bod',
  231. 'br': 'bre',
  232. 'bs': 'bos',
  233. 'ca': 'cat',
  234. 'ce': 'che',
  235. 'ch': 'cha',
  236. 'co': 'cos',
  237. 'cr': 'cre',
  238. 'cs': 'ces',
  239. 'cu': 'chu',
  240. 'cv': 'chv',
  241. 'cy': 'cym',
  242. 'da': 'dan',
  243. 'de': 'deu',
  244. 'dv': 'div',
  245. 'dz': 'dzo',
  246. 'ee': 'ewe',
  247. 'el': 'ell',
  248. 'en': 'eng',
  249. 'eo': 'epo',
  250. 'es': 'spa',
  251. 'et': 'est',
  252. 'eu': 'eus',
  253. 'fa': 'fas',
  254. 'ff': 'ful',
  255. 'fi': 'fin',
  256. 'fj': 'fij',
  257. 'fo': 'fao',
  258. 'fr': 'fra',
  259. 'fy': 'fry',
  260. 'ga': 'gle',
  261. 'gd': 'gla',
  262. 'gl': 'glg',
  263. 'gn': 'grn',
  264. 'gu': 'guj',
  265. 'gv': 'glv',
  266. 'ha': 'hau',
  267. 'he': 'heb',
  268. 'hi': 'hin',
  269. 'ho': 'hmo',
  270. 'hr': 'hrv',
  271. 'ht': 'hat',
  272. 'hu': 'hun',
  273. 'hy': 'hye',
  274. 'hz': 'her',
  275. 'ia': 'ina',
  276. 'id': 'ind',
  277. 'ie': 'ile',
  278. 'ig': 'ibo',
  279. 'ii': 'iii',
  280. 'ik': 'ipk',
  281. 'io': 'ido',
  282. 'is': 'isl',
  283. 'it': 'ita',
  284. 'iu': 'iku',
  285. 'ja': 'jpn',
  286. 'jv': 'jav',
  287. 'ka': 'kat',
  288. 'kg': 'kon',
  289. 'ki': 'kik',
  290. 'kj': 'kua',
  291. 'kk': 'kaz',
  292. 'kl': 'kal',
  293. 'km': 'khm',
  294. 'kn': 'kan',
  295. 'ko': 'kor',
  296. 'kr': 'kau',
  297. 'ks': 'kas',
  298. 'ku': 'kur',
  299. 'kv': 'kom',
  300. 'kw': 'cor',
  301. 'ky': 'kir',
  302. 'la': 'lat',
  303. 'lb': 'ltz',
  304. 'lg': 'lug',
  305. 'li': 'lim',
  306. 'ln': 'lin',
  307. 'lo': 'lao',
  308. 'lt': 'lit',
  309. 'lu': 'lub',
  310. 'lv': 'lav',
  311. 'mg': 'mlg',
  312. 'mh': 'mah',
  313. 'mi': 'mri',
  314. 'mk': 'mkd',
  315. 'ml': 'mal',
  316. 'mn': 'mon',
  317. 'mr': 'mar',
  318. 'ms': 'msa',
  319. 'mt': 'mlt',
  320. 'my': 'mya',
  321. 'na': 'nau',
  322. 'nb': 'nob',
  323. 'nd': 'nde',
  324. 'ne': 'nep',
  325. 'ng': 'ndo',
  326. 'nl': 'nld',
  327. 'nn': 'nno',
  328. 'no': 'nor',
  329. 'nr': 'nbl',
  330. 'nv': 'nav',
  331. 'ny': 'nya',
  332. 'oc': 'oci',
  333. 'oj': 'oji',
  334. 'om': 'orm',
  335. 'or': 'ori',
  336. 'os': 'oss',
  337. 'pa': 'pan',
  338. 'pi': 'pli',
  339. 'pl': 'pol',
  340. 'ps': 'pus',
  341. 'pt': 'por',
  342. 'qu': 'que',
  343. 'rm': 'roh',
  344. 'rn': 'run',
  345. 'ro': 'ron',
  346. 'ru': 'rus',
  347. 'rw': 'kin',
  348. 'sa': 'san',
  349. 'sc': 'srd',
  350. 'sd': 'snd',
  351. 'se': 'sme',
  352. 'sg': 'sag',
  353. 'si': 'sin',
  354. 'sk': 'slk',
  355. 'sl': 'slv',
  356. 'sm': 'smo',
  357. 'sn': 'sna',
  358. 'so': 'som',
  359. 'sq': 'sqi',
  360. 'sr': 'srp',
  361. 'ss': 'ssw',
  362. 'st': 'sot',
  363. 'su': 'sun',
  364. 'sv': 'swe',
  365. 'sw': 'swa',
  366. 'ta': 'tam',
  367. 'te': 'tel',
  368. 'tg': 'tgk',
  369. 'th': 'tha',
  370. 'ti': 'tir',
  371. 'tk': 'tuk',
  372. 'tl': 'tgl',
  373. 'tn': 'tsn',
  374. 'to': 'ton',
  375. 'tr': 'tur',
  376. 'ts': 'tso',
  377. 'tt': 'tat',
  378. 'tw': 'twi',
  379. 'ty': 'tah',
  380. 'ug': 'uig',
  381. 'uk': 'ukr',
  382. 'ur': 'urd',
  383. 'uz': 'uzb',
  384. 've': 'ven',
  385. 'vi': 'vie',
  386. 'vo': 'vol',
  387. 'wa': 'wln',
  388. 'wo': 'wol',
  389. 'xh': 'xho',
  390. 'yi': 'yid',
  391. 'yo': 'yor',
  392. 'za': 'zha',
  393. 'zh': 'zho',
  394. 'zu': 'zul',
  395. }
  396. def __init__(self, downloader=None, subtitlesformat='srt'):
  397. super(FFmpegEmbedSubtitlePP, self).__init__(downloader)
  398. self._subformat = subtitlesformat
  399. @classmethod
  400. def _conver_lang_code(cls, code):
  401. """Convert language code from ISO 639-1 to ISO 639-2/T"""
  402. return cls._lang_map.get(code[:2])
  403. def run(self, information):
  404. if information['ext'] != u'mp4':
  405. self._downloader.to_screen(u'[ffmpeg] Subtitles can only be embedded in mp4 files')
  406. return True, information
  407. sub_langs = [key for key in information['subtitles']]
  408. filename = information['filepath']
  409. input_files = [filename] + [subtitles_filename(filename, lang, self._subformat) for lang in sub_langs]
  410. opts = ['-map', '0:0', '-map', '0:1', '-c:v', 'copy', '-c:a', 'copy']
  411. for (i, lang) in enumerate(sub_langs):
  412. opts.extend(['-map', '%d:0' % (i+1), '-c:s:%d' % i, 'mov_text'])
  413. lang_code = self._conver_lang_code(lang)
  414. if lang_code is not None:
  415. opts.extend(['-metadata:s:s:%d' % i, 'language=%s' % lang_code])
  416. opts.extend(['-f', 'mp4'])
  417. temp_filename = filename + u'.temp'
  418. self._downloader.to_screen(u'[ffmpeg] Embedding subtitles in \'%s\'' % filename)
  419. self.run_ffmpeg_multiple_files(input_files, temp_filename, opts)
  420. os.remove(encodeFilename(filename))
  421. os.rename(encodeFilename(temp_filename), encodeFilename(filename))
  422. return True, information