You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

553 lines
21 KiB

13 years ago
13 years ago
12 years ago
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. __authors__ = (
  4. 'Ricardo Garcia Gonzalez',
  5. 'Danny Colligan',
  6. 'Benjamin Johnson',
  7. 'Vasyl\' Vavrychuk',
  8. 'Witold Baryluk',
  9. 'Paweł Paprota',
  10. 'Gergely Imreh',
  11. 'Rogério Brito',
  12. 'Philipp Hagemeister',
  13. 'Sören Schulze',
  14. 'Kevin Ngo',
  15. 'Ori Avtalion',
  16. 'shizeeg',
  17. 'Filippo Valsorda',
  18. )
  19. __license__ = 'Public Domain'
  20. __version__ = '2012.09.27'
  21. UPDATE_URL = 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl'
  22. UPDATE_URL_VERSION = 'https://raw.github.com/rg3/youtube-dl/master/LATEST_VERSION'
  23. UPDATE_URL_EXE = 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl.exe'
  24. import cookielib
  25. import getpass
  26. import optparse
  27. import os
  28. import re
  29. import shlex
  30. import socket
  31. import subprocess
  32. import sys
  33. import urllib2
  34. import warnings
  35. from utils import *
  36. from FileDownloader import *
  37. from InfoExtractors import *
  38. from PostProcessor import *
  39. def updateSelf(downloader, filename):
  40. ''' Update the program file with the latest version from the repository '''
  41. # Note: downloader only used for options
  42. if not os.access(filename, os.W_OK):
  43. sys.exit('ERROR: no write permissions on %s' % filename)
  44. downloader.to_screen(u'Updating to latest version...')
  45. urlv = urllib2.urlopen(UPDATE_URL_VERSION)
  46. newversion = urlv.read().strip()
  47. if newversion == __version__:
  48. downloader.to_screen(u'youtube-dl is up-to-date (' + __version__ + ')')
  49. return
  50. urlv.close()
  51. if hasattr(sys, "frozen"): #py2exe
  52. exe = os.path.abspath(filename)
  53. directory = os.path.dirname(exe)
  54. if not os.access(directory, os.W_OK):
  55. sys.exit('ERROR: no write permissions on %s' % directory)
  56. try:
  57. urlh = urllib2.urlopen(UPDATE_URL_EXE)
  58. newcontent = urlh.read()
  59. urlh.close()
  60. with open(exe + '.new', 'wb') as outf:
  61. outf.write(newcontent)
  62. except (IOError, OSError), err:
  63. sys.exit('ERROR: unable to download latest version')
  64. try:
  65. bat = os.path.join(directory, 'youtube-dl-updater.bat')
  66. b = open(bat, 'w')
  67. print >> b, """
  68. echo Updating youtube-dl...
  69. ping 127.0.0.1 -n 5 -w 1000 > NUL
  70. move /Y "%s.new" "%s"
  71. del "%s"
  72. """ %(exe, exe, bat)
  73. b.close()
  74. os.startfile(bat)
  75. except (IOError, OSError), err:
  76. sys.exit('ERROR: unable to overwrite current version')
  77. else:
  78. try:
  79. urlh = urllib2.urlopen(UPDATE_URL)
  80. newcontent = urlh.read()
  81. urlh.close()
  82. except (IOError, OSError), err:
  83. sys.exit('ERROR: unable to download latest version')
  84. try:
  85. with open(filename, 'wb') as outf:
  86. outf.write(newcontent)
  87. except (IOError, OSError), err:
  88. sys.exit('ERROR: unable to overwrite current version')
  89. downloader.to_screen(u'Updated youtube-dl. Restart youtube-dl to use the new version.')
  90. def parseOpts():
  91. def _readOptions(filename_bytes):
  92. try:
  93. optionf = open(filename_bytes)
  94. except IOError:
  95. return [] # silently skip if file is not present
  96. try:
  97. res = []
  98. for l in optionf:
  99. res += shlex.split(l, comments=True)
  100. finally:
  101. optionf.close()
  102. return res
  103. def _format_option_string(option):
  104. ''' ('-o', '--option') -> -o, --format METAVAR'''
  105. opts = []
  106. if option._short_opts: opts.append(option._short_opts[0])
  107. if option._long_opts: opts.append(option._long_opts[0])
  108. if len(opts) > 1: opts.insert(1, ', ')
  109. if option.takes_value(): opts.append(' %s' % option.metavar)
  110. return "".join(opts)
  111. def _find_term_columns():
  112. columns = os.environ.get('COLUMNS', None)
  113. if columns:
  114. return int(columns)
  115. try:
  116. sp = subprocess.Popen(['stty', 'size'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
  117. out,err = sp.communicate()
  118. return int(out.split()[1])
  119. except:
  120. pass
  121. return None
  122. max_width = 80
  123. max_help_position = 80
  124. # No need to wrap help messages if we're on a wide console
  125. columns = _find_term_columns()
  126. if columns: max_width = columns
  127. fmt = optparse.IndentedHelpFormatter(width=max_width, max_help_position=max_help_position)
  128. fmt.format_option_strings = _format_option_string
  129. kw = {
  130. 'version' : __version__,
  131. 'formatter' : fmt,
  132. 'usage' : '%prog [options] url [url...]',
  133. 'conflict_handler' : 'resolve',
  134. }
  135. parser = optparse.OptionParser(**kw)
  136. # option groups
  137. general = optparse.OptionGroup(parser, 'General Options')
  138. selection = optparse.OptionGroup(parser, 'Video Selection')
  139. authentication = optparse.OptionGroup(parser, 'Authentication Options')
  140. video_format = optparse.OptionGroup(parser, 'Video Format Options')
  141. postproc = optparse.OptionGroup(parser, 'Post-processing Options')
  142. filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
  143. verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
  144. general.add_option('-h', '--help',
  145. action='help', help='print this help text and exit')
  146. general.add_option('-v', '--version',
  147. action='version', help='print program version and exit')
  148. general.add_option('-U', '--update',
  149. action='store_true', dest='update_self', help='update this program to latest version')
  150. general.add_option('-i', '--ignore-errors',
  151. action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
  152. general.add_option('-r', '--rate-limit',
  153. dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)')
  154. general.add_option('-R', '--retries',
  155. dest='retries', metavar='RETRIES', help='number of retries (default is %default)', default=10)
  156. general.add_option('--dump-user-agent',
  157. action='store_true', dest='dump_user_agent',
  158. help='display the current browser identification', default=False)
  159. general.add_option('--user-agent',
  160. dest='user_agent', help='specify a custom user agent', metavar='UA')
  161. general.add_option('--list-extractors',
  162. action='store_true', dest='list_extractors',
  163. help='List all supported extractors and the URLs they would handle', default=False)
  164. selection.add_option('--playlist-start',
  165. dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is %default)', default=1)
  166. selection.add_option('--playlist-end',
  167. dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
  168. selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)')
  169. selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)')
  170. selection.add_option('--max-downloads', metavar='NUMBER', dest='max_downloads', help='Abort after downloading NUMBER files', default=None)
  171. authentication.add_option('-u', '--username',
  172. dest='username', metavar='USERNAME', help='account username')
  173. authentication.add_option('-p', '--password',
  174. dest='password', metavar='PASSWORD', help='account password')
  175. authentication.add_option('-n', '--netrc',
  176. action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
  177. video_format.add_option('-f', '--format',
  178. action='store', dest='format', metavar='FORMAT', help='video format code')
  179. video_format.add_option('--all-formats',
  180. action='store_const', dest='format', help='download all available video formats', const='all')
  181. video_format.add_option('--prefer-free-formats',
  182. action='store_true', dest='prefer_free_formats', default=False, help='prefer free video formats unless a specific one is requested')
  183. video_format.add_option('--max-quality',
  184. action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
  185. video_format.add_option('-F', '--list-formats',
  186. action='store_true', dest='listformats', help='list all available formats (currently youtube only)')
  187. video_format.add_option('--write-srt',
  188. action='store_true', dest='writesubtitles',
  189. help='write video closed captions to a .srt file (currently youtube only)', default=False)
  190. video_format.add_option('--srt-lang',
  191. action='store', dest='subtitleslang', metavar='LANG',
  192. help='language of the closed captions to download (optional) use IETF language tags like \'en\'')
  193. verbosity.add_option('-q', '--quiet',
  194. action='store_true', dest='quiet', help='activates quiet mode', default=False)
  195. verbosity.add_option('-s', '--simulate',
  196. action='store_true', dest='simulate', help='do not download the video and do not write anything to disk', default=False)
  197. verbosity.add_option('--skip-download',
  198. action='store_true', dest='skip_download', help='do not download the video', default=False)
  199. verbosity.add_option('-g', '--get-url',
  200. action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
  201. verbosity.add_option('-e', '--get-title',
  202. action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
  203. verbosity.add_option('--get-thumbnail',
  204. action='store_true', dest='getthumbnail',
  205. help='simulate, quiet but print thumbnail URL', default=False)
  206. verbosity.add_option('--get-description',
  207. action='store_true', dest='getdescription',
  208. help='simulate, quiet but print video description', default=False)
  209. verbosity.add_option('--get-filename',
  210. action='store_true', dest='getfilename',
  211. help='simulate, quiet but print output filename', default=False)
  212. verbosity.add_option('--get-format',
  213. action='store_true', dest='getformat',
  214. help='simulate, quiet but print output format', default=False)
  215. verbosity.add_option('--no-progress',
  216. action='store_true', dest='noprogress', help='do not print progress bar', default=False)
  217. verbosity.add_option('--console-title',
  218. action='store_true', dest='consoletitle',
  219. help='display progress in console titlebar', default=False)
  220. verbosity.add_option('-v', '--verbose',
  221. action='store_true', dest='verbose', help='print various debugging information', default=False)
  222. filesystem.add_option('-t', '--title',
  223. action='store_true', dest='usetitle', help='use title in file name', default=False)
  224. filesystem.add_option('-l', '--literal',
  225. action='store_true', dest='useliteral', help='use literal title in file name', default=False)
  226. filesystem.add_option('-A', '--auto-number',
  227. action='store_true', dest='autonumber',
  228. help='number downloaded files starting from 00000', default=False)
  229. filesystem.add_option('-o', '--output',
  230. dest='outtmpl', metavar='TEMPLATE', help='output filename template. Use %(stitle)s to get the title, %(uploader)s for the uploader name, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(upload_date)s for the upload date (YYYYMMDD), and %% for a literal percent. Use - to output to stdout.')
  231. filesystem.add_option('-a', '--batch-file',
  232. dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
  233. filesystem.add_option('-w', '--no-overwrites',
  234. action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
  235. filesystem.add_option('-c', '--continue',
  236. action='store_true', dest='continue_dl', help='resume partially downloaded files', default=True)
  237. filesystem.add_option('--no-continue',
  238. action='store_false', dest='continue_dl',
  239. help='do not resume partially downloaded files (restart from beginning)')
  240. filesystem.add_option('--cookies',
  241. dest='cookiefile', metavar='FILE', help='file to read cookies from and dump cookie jar in')
  242. filesystem.add_option('--no-part',
  243. action='store_true', dest='nopart', help='do not use .part files', default=False)
  244. filesystem.add_option('--no-mtime',
  245. action='store_false', dest='updatetime',
  246. help='do not use the Last-modified header to set the file modification time', default=True)
  247. filesystem.add_option('--write-description',
  248. action='store_true', dest='writedescription',
  249. help='write video description to a .description file', default=False)
  250. filesystem.add_option('--write-info-json',
  251. action='store_true', dest='writeinfojson',
  252. help='write video metadata to a .info.json file', default=False)
  253. postproc.add_option('--extract-audio', action='store_true', dest='extractaudio', default=False,
  254. help='convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)')
  255. postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best',
  256. help='"best", "aac", "vorbis", "mp3", "m4a", or "wav"; best by default')
  257. postproc.add_option('--audio-quality', metavar='QUALITY', dest='audioquality', default='5',
  258. help='ffmpeg/avconv audio quality specification, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default 5)')
  259. postproc.add_option('-k', '--keep-video', action='store_true', dest='keepvideo', default=False,
  260. help='keeps the video file on disk after the post-processing; the video is erased by default')
  261. parser.add_option_group(general)
  262. parser.add_option_group(selection)
  263. parser.add_option_group(filesystem)
  264. parser.add_option_group(verbosity)
  265. parser.add_option_group(video_format)
  266. parser.add_option_group(authentication)
  267. parser.add_option_group(postproc)
  268. xdg_config_home = os.environ.get('XDG_CONFIG_HOME')
  269. if xdg_config_home:
  270. userConf = os.path.join(xdg_config_home, 'youtube-dl.conf')
  271. else:
  272. userConf = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf')
  273. argv = _readOptions('/etc/youtube-dl.conf') + _readOptions(userConf) + sys.argv[1:]
  274. opts, args = parser.parse_args(argv)
  275. return parser, opts, args
  276. def gen_extractors():
  277. """ Return a list of an instance of every supported extractor.
  278. The order does matter; the first extractor matched is the one handling the URL.
  279. """
  280. return [
  281. YoutubePlaylistIE(),
  282. YoutubeUserIE(),
  283. YoutubeSearchIE(),
  284. YoutubeIE(),
  285. MetacafeIE(),
  286. DailymotionIE(),
  287. GoogleIE(),
  288. GoogleSearchIE(),
  289. PhotobucketIE(),
  290. YahooIE(),
  291. YahooSearchIE(),
  292. DepositFilesIE(),
  293. FacebookIE(),
  294. BlipTVUserIE(),
  295. BlipTVIE(),
  296. VimeoIE(),
  297. MyVideoIE(),
  298. ComedyCentralIE(),
  299. EscapistIE(),
  300. CollegeHumorIE(),
  301. XVideosIE(),
  302. SoundcloudIE(),
  303. InfoQIE(),
  304. MixcloudIE(),
  305. StanfordOpenClassroomIE(),
  306. MTVIE(),
  307. YoukuIE(),
  308. XNXXIE(),
  309. GenericIE()
  310. ]
  311. def _real_main():
  312. parser, opts, args = parseOpts()
  313. # Open appropriate CookieJar
  314. if opts.cookiefile is None:
  315. jar = cookielib.CookieJar()
  316. else:
  317. try:
  318. jar = cookielib.MozillaCookieJar(opts.cookiefile)
  319. if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK):
  320. jar.load()
  321. except (IOError, OSError), err:
  322. sys.exit(u'ERROR: unable to open cookie file')
  323. # Set user agent
  324. if opts.user_agent is not None:
  325. std_headers['User-Agent'] = opts.user_agent
  326. # Dump user agent
  327. if opts.dump_user_agent:
  328. print std_headers['User-Agent']
  329. sys.exit(0)
  330. # Batch file verification
  331. batchurls = []
  332. if opts.batchfile is not None:
  333. try:
  334. if opts.batchfile == '-':
  335. batchfd = sys.stdin
  336. else:
  337. batchfd = open(opts.batchfile, 'r')
  338. batchurls = batchfd.readlines()
  339. batchurls = [x.strip() for x in batchurls]
  340. batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]
  341. except IOError:
  342. sys.exit(u'ERROR: batch file could not be read')
  343. all_urls = batchurls + args
  344. all_urls = map(lambda url: url.strip(), all_urls)
  345. # General configuration
  346. cookie_processor = urllib2.HTTPCookieProcessor(jar)
  347. proxy_handler = urllib2.ProxyHandler()
  348. opener = urllib2.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler())
  349. urllib2.install_opener(opener)
  350. socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
  351. extractors = gen_extractors()
  352. if opts.list_extractors:
  353. for ie in extractors:
  354. print(ie.IE_NAME)
  355. matchedUrls = filter(lambda url: ie.suitable(url), all_urls)
  356. all_urls = filter(lambda url: url not in matchedUrls, all_urls)
  357. for mu in matchedUrls:
  358. print(u' ' + mu)
  359. sys.exit(0)
  360. # Conflicting, missing and erroneous options
  361. if opts.usenetrc and (opts.username is not None or opts.password is not None):
  362. parser.error(u'using .netrc conflicts with giving username/password')
  363. if opts.password is not None and opts.username is None:
  364. parser.error(u'account username missing')
  365. if opts.outtmpl is not None and (opts.useliteral or opts.usetitle or opts.autonumber):
  366. parser.error(u'using output template conflicts with using title, literal title or auto number')
  367. if opts.usetitle and opts.useliteral:
  368. parser.error(u'using title conflicts with using literal title')
  369. if opts.username is not None and opts.password is None:
  370. opts.password = getpass.getpass(u'Type account password and press return:')
  371. if opts.ratelimit is not None:
  372. numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
  373. if numeric_limit is None:
  374. parser.error(u'invalid rate limit specified')
  375. opts.ratelimit = numeric_limit
  376. if opts.retries is not None:
  377. try:
  378. opts.retries = long(opts.retries)
  379. except (TypeError, ValueError), err:
  380. parser.error(u'invalid retry count specified')
  381. try:
  382. opts.playliststart = int(opts.playliststart)
  383. if opts.playliststart <= 0:
  384. raise ValueError(u'Playlist start must be positive')
  385. except (TypeError, ValueError), err:
  386. parser.error(u'invalid playlist start number specified')
  387. try:
  388. opts.playlistend = int(opts.playlistend)
  389. if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart):
  390. raise ValueError(u'Playlist end must be greater than playlist start')
  391. except (TypeError, ValueError), err:
  392. parser.error(u'invalid playlist end number specified')
  393. if opts.extractaudio:
  394. if opts.audioformat not in ['best', 'aac', 'mp3', 'vorbis', 'm4a', 'wav']:
  395. parser.error(u'invalid audio format specified')
  396. if opts.audioquality:
  397. opts.audioquality = opts.audioquality.strip('k').strip('K')
  398. if not opts.audioquality.isdigit():
  399. parser.error(u'invalid audio quality specified')
  400. # File downloader
  401. fd = FileDownloader({
  402. 'usenetrc': opts.usenetrc,
  403. 'username': opts.username,
  404. 'password': opts.password,
  405. 'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat),
  406. 'forceurl': opts.geturl,
  407. 'forcetitle': opts.gettitle,
  408. 'forcethumbnail': opts.getthumbnail,
  409. 'forcedescription': opts.getdescription,
  410. 'forcefilename': opts.getfilename,
  411. 'forceformat': opts.getformat,
  412. 'simulate': opts.simulate,
  413. 'skip_download': (opts.skip_download or opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat),
  414. 'format': opts.format,
  415. 'format_limit': opts.format_limit,
  416. 'listformats': opts.listformats,
  417. 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
  418. or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s')
  419. or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s')
  420. or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s')
  421. or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(stitle)s-%(id)s.%(ext)s')
  422. or (opts.useliteral and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s')
  423. or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
  424. or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
  425. or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
  426. or u'%(id)s.%(ext)s'),
  427. 'ignoreerrors': opts.ignoreerrors,
  428. 'ratelimit': opts.ratelimit,
  429. 'nooverwrites': opts.nooverwrites,
  430. 'retries': opts.retries,
  431. 'continuedl': opts.continue_dl,
  432. 'noprogress': opts.noprogress,
  433. 'playliststart': opts.playliststart,
  434. 'playlistend': opts.playlistend,
  435. 'logtostderr': opts.outtmpl == '-',
  436. 'consoletitle': opts.consoletitle,
  437. 'nopart': opts.nopart,
  438. 'updatetime': opts.updatetime,
  439. 'writedescription': opts.writedescription,
  440. 'writeinfojson': opts.writeinfojson,
  441. 'writesubtitles': opts.writesubtitles,
  442. 'subtitleslang': opts.subtitleslang,
  443. 'matchtitle': opts.matchtitle,
  444. 'rejecttitle': opts.rejecttitle,
  445. 'max_downloads': opts.max_downloads,
  446. 'prefer_free_formats': opts.prefer_free_formats,
  447. 'verbose': opts.verbose,
  448. })
  449. if opts.verbose:
  450. fd.to_screen(u'[debug] Proxy map: ' + str(proxy_handler.proxies))
  451. for extractor in extractors:
  452. fd.add_info_extractor(extractor)
  453. # PostProcessors
  454. if opts.extractaudio:
  455. fd.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, keepvideo=opts.keepvideo))
  456. # Update version
  457. if opts.update_self:
  458. updateSelf(fd, sys.argv[0])
  459. # Maybe do nothing
  460. if len(all_urls) < 1:
  461. if not opts.update_self:
  462. parser.error(u'you must provide at least one URL')
  463. else:
  464. sys.exit()
  465. try:
  466. retcode = fd.download(all_urls)
  467. except MaxDownloadsReached:
  468. fd.to_screen(u'--max-download limit reached, aborting.')
  469. retcode = 101
  470. # Dump cookie jar if requested
  471. if opts.cookiefile is not None:
  472. try:
  473. jar.save()
  474. except (IOError, OSError), err:
  475. sys.exit(u'ERROR: unable to save cookie jar')
  476. sys.exit(retcode)
  477. def main():
  478. try:
  479. _real_main()
  480. except DownloadError:
  481. sys.exit(1)
  482. except SameFileError:
  483. sys.exit(u'ERROR: fixed output name but more than one file to download')
  484. except KeyboardInterrupt:
  485. sys.exit(u'\nERROR: Interrupted by user')