You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

557 lines
21 KiB

13 years ago
13 years ago
12 years ago
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. from __future__ import with_statement
  4. __authors__ = (
  5. 'Ricardo Garcia Gonzalez',
  6. 'Danny Colligan',
  7. 'Benjamin Johnson',
  8. 'Vasyl\' Vavrychuk',
  9. 'Witold Baryluk',
  10. 'Paweł Paprota',
  11. 'Gergely Imreh',
  12. 'Rogério Brito',
  13. 'Philipp Hagemeister',
  14. 'Sören Schulze',
  15. 'Kevin Ngo',
  16. 'Ori Avtalion',
  17. 'shizeeg',
  18. 'Filippo Valsorda',
  19. )
  20. __license__ = 'Public Domain'
  21. __version__ = '2012.10.09'
  22. UPDATE_URL = 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl'
  23. UPDATE_URL_VERSION = 'https://raw.github.com/rg3/youtube-dl/master/LATEST_VERSION'
  24. UPDATE_URL_EXE = 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl.exe'
  25. import cookielib
  26. import getpass
  27. import optparse
  28. import os
  29. import re
  30. import shlex
  31. import socket
  32. import subprocess
  33. import sys
  34. import urllib2
  35. import warnings
  36. from utils import *
  37. from FileDownloader import *
  38. from InfoExtractors import *
  39. from PostProcessor import *
  40. def updateSelf(downloader, filename):
  41. ''' Update the program file with the latest version from the repository '''
  42. # Note: downloader only used for options
  43. if not os.access(filename, os.W_OK):
  44. sys.exit('ERROR: no write permissions on %s' % filename)
  45. downloader.to_screen(u'Updating to latest version...')
  46. urlv = urllib2.urlopen(UPDATE_URL_VERSION)
  47. newversion = urlv.read().strip()
  48. if newversion == __version__:
  49. downloader.to_screen(u'youtube-dl is up-to-date (' + __version__ + ')')
  50. return
  51. urlv.close()
  52. if hasattr(sys, "frozen"): #py2exe
  53. exe = os.path.abspath(filename)
  54. directory = os.path.dirname(exe)
  55. if not os.access(directory, os.W_OK):
  56. sys.exit('ERROR: no write permissions on %s' % directory)
  57. try:
  58. urlh = urllib2.urlopen(UPDATE_URL_EXE)
  59. newcontent = urlh.read()
  60. urlh.close()
  61. with open(exe + '.new', 'wb') as outf:
  62. outf.write(newcontent)
  63. except (IOError, OSError), err:
  64. sys.exit('ERROR: unable to download latest version')
  65. try:
  66. bat = os.path.join(directory, 'youtube-dl-updater.bat')
  67. b = open(bat, 'w')
  68. print >> b, """
  69. echo Updating youtube-dl...
  70. ping 127.0.0.1 -n 5 -w 1000 > NUL
  71. move /Y "%s.new" "%s"
  72. del "%s"
  73. """ %(exe, exe, bat)
  74. b.close()
  75. os.startfile(bat)
  76. except (IOError, OSError), err:
  77. sys.exit('ERROR: unable to overwrite current version')
  78. else:
  79. try:
  80. urlh = urllib2.urlopen(UPDATE_URL)
  81. newcontent = urlh.read()
  82. urlh.close()
  83. except (IOError, OSError), err:
  84. sys.exit('ERROR: unable to download latest version')
  85. try:
  86. with open(filename, 'wb') as outf:
  87. outf.write(newcontent)
  88. except (IOError, OSError), err:
  89. sys.exit('ERROR: unable to overwrite current version')
  90. downloader.to_screen(u'Updated youtube-dl. Restart youtube-dl to use the new version.')
  91. def parseOpts():
  92. def _readOptions(filename_bytes):
  93. try:
  94. optionf = open(filename_bytes)
  95. except IOError:
  96. return [] # silently skip if file is not present
  97. try:
  98. res = []
  99. for l in optionf:
  100. res += shlex.split(l, comments=True)
  101. finally:
  102. optionf.close()
  103. return res
  104. def _format_option_string(option):
  105. ''' ('-o', '--option') -> -o, --format METAVAR'''
  106. opts = []
  107. if option._short_opts: opts.append(option._short_opts[0])
  108. if option._long_opts: opts.append(option._long_opts[0])
  109. if len(opts) > 1: opts.insert(1, ', ')
  110. if option.takes_value(): opts.append(' %s' % option.metavar)
  111. return "".join(opts)
  112. def _find_term_columns():
  113. columns = os.environ.get('COLUMNS', None)
  114. if columns:
  115. return int(columns)
  116. try:
  117. sp = subprocess.Popen(['stty', 'size'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
  118. out,err = sp.communicate()
  119. return int(out.split()[1])
  120. except:
  121. pass
  122. return None
  123. max_width = 80
  124. max_help_position = 80
  125. # No need to wrap help messages if we're on a wide console
  126. columns = _find_term_columns()
  127. if columns: max_width = columns
  128. fmt = optparse.IndentedHelpFormatter(width=max_width, max_help_position=max_help_position)
  129. fmt.format_option_strings = _format_option_string
  130. kw = {
  131. 'version' : __version__,
  132. 'formatter' : fmt,
  133. 'usage' : '%prog [options] url [url...]',
  134. 'conflict_handler' : 'resolve',
  135. }
  136. parser = optparse.OptionParser(**kw)
  137. # option groups
  138. general = optparse.OptionGroup(parser, 'General Options')
  139. selection = optparse.OptionGroup(parser, 'Video Selection')
  140. authentication = optparse.OptionGroup(parser, 'Authentication Options')
  141. video_format = optparse.OptionGroup(parser, 'Video Format Options')
  142. postproc = optparse.OptionGroup(parser, 'Post-processing Options')
  143. filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
  144. verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
  145. general.add_option('-h', '--help',
  146. action='help', help='print this help text and exit')
  147. general.add_option('-v', '--version',
  148. action='version', help='print program version and exit')
  149. general.add_option('-U', '--update',
  150. action='store_true', dest='update_self', help='update this program to latest version')
  151. general.add_option('-i', '--ignore-errors',
  152. action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
  153. general.add_option('-r', '--rate-limit',
  154. dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)')
  155. general.add_option('-R', '--retries',
  156. dest='retries', metavar='RETRIES', help='number of retries (default is %default)', default=10)
  157. general.add_option('--dump-user-agent',
  158. action='store_true', dest='dump_user_agent',
  159. help='display the current browser identification', default=False)
  160. general.add_option('--user-agent',
  161. dest='user_agent', help='specify a custom user agent', metavar='UA')
  162. general.add_option('--list-extractors',
  163. action='store_true', dest='list_extractors',
  164. help='List all supported extractors and the URLs they would handle', default=False)
  165. selection.add_option('--playlist-start',
  166. dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is %default)', default=1)
  167. selection.add_option('--playlist-end',
  168. dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
  169. selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)')
  170. selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)')
  171. selection.add_option('--max-downloads', metavar='NUMBER', dest='max_downloads', help='Abort after downloading NUMBER files', default=None)
  172. authentication.add_option('-u', '--username',
  173. dest='username', metavar='USERNAME', help='account username')
  174. authentication.add_option('-p', '--password',
  175. dest='password', metavar='PASSWORD', help='account password')
  176. authentication.add_option('-n', '--netrc',
  177. action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
  178. video_format.add_option('-f', '--format',
  179. action='store', dest='format', metavar='FORMAT', help='video format code')
  180. video_format.add_option('--all-formats',
  181. action='store_const', dest='format', help='download all available video formats', const='all')
  182. video_format.add_option('--prefer-free-formats',
  183. action='store_true', dest='prefer_free_formats', default=False, help='prefer free video formats unless a specific one is requested')
  184. video_format.add_option('--max-quality',
  185. action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
  186. video_format.add_option('-F', '--list-formats',
  187. action='store_true', dest='listformats', help='list all available formats (currently youtube only)')
  188. video_format.add_option('--write-srt',
  189. action='store_true', dest='writesubtitles',
  190. help='write video closed captions to a .srt file (currently youtube only)', default=False)
  191. video_format.add_option('--srt-lang',
  192. action='store', dest='subtitleslang', metavar='LANG',
  193. help='language of the closed captions to download (optional) use IETF language tags like \'en\'')
  194. verbosity.add_option('-q', '--quiet',
  195. action='store_true', dest='quiet', help='activates quiet mode', default=False)
  196. verbosity.add_option('-s', '--simulate',
  197. action='store_true', dest='simulate', help='do not download the video and do not write anything to disk', default=False)
  198. verbosity.add_option('--skip-download',
  199. action='store_true', dest='skip_download', help='do not download the video', default=False)
  200. verbosity.add_option('-g', '--get-url',
  201. action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
  202. verbosity.add_option('-e', '--get-title',
  203. action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
  204. verbosity.add_option('--get-thumbnail',
  205. action='store_true', dest='getthumbnail',
  206. help='simulate, quiet but print thumbnail URL', default=False)
  207. verbosity.add_option('--get-description',
  208. action='store_true', dest='getdescription',
  209. help='simulate, quiet but print video description', default=False)
  210. verbosity.add_option('--get-filename',
  211. action='store_true', dest='getfilename',
  212. help='simulate, quiet but print output filename', default=False)
  213. verbosity.add_option('--get-format',
  214. action='store_true', dest='getformat',
  215. help='simulate, quiet but print output format', default=False)
  216. verbosity.add_option('--no-progress',
  217. action='store_true', dest='noprogress', help='do not print progress bar', default=False)
  218. verbosity.add_option('--console-title',
  219. action='store_true', dest='consoletitle',
  220. help='display progress in console titlebar', default=False)
  221. verbosity.add_option('-v', '--verbose',
  222. action='store_true', dest='verbose', help='print various debugging information', default=False)
  223. filesystem.add_option('-t', '--title',
  224. action='store_true', dest='usetitle', help='use title in file name', default=False)
  225. filesystem.add_option('-l', '--literal',
  226. action='store_true', dest='useliteral', help='use literal title in file name', default=False)
  227. filesystem.add_option('-A', '--auto-number',
  228. action='store_true', dest='autonumber',
  229. help='number downloaded files starting from 00000', default=False)
  230. filesystem.add_option('-o', '--output',
  231. dest='outtmpl', metavar='TEMPLATE', help='output filename template. Use %(stitle)s to get the title, %(uploader)s for the uploader name, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(upload_date)s for the upload date (YYYYMMDD), %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the video id and %% for a literal percent. Use - to output to stdout.')
  232. filesystem.add_option('-a', '--batch-file',
  233. dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
  234. filesystem.add_option('-w', '--no-overwrites',
  235. action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
  236. filesystem.add_option('-c', '--continue',
  237. action='store_true', dest='continue_dl', help='resume partially downloaded files', default=True)
  238. filesystem.add_option('--no-continue',
  239. action='store_false', dest='continue_dl',
  240. help='do not resume partially downloaded files (restart from beginning)')
  241. filesystem.add_option('--cookies',
  242. dest='cookiefile', metavar='FILE', help='file to read cookies from and dump cookie jar in')
  243. filesystem.add_option('--no-part',
  244. action='store_true', dest='nopart', help='do not use .part files', default=False)
  245. filesystem.add_option('--no-mtime',
  246. action='store_false', dest='updatetime',
  247. help='do not use the Last-modified header to set the file modification time', default=True)
  248. filesystem.add_option('--write-description',
  249. action='store_true', dest='writedescription',
  250. help='write video description to a .description file', default=False)
  251. filesystem.add_option('--write-info-json',
  252. action='store_true', dest='writeinfojson',
  253. help='write video metadata to a .info.json file', default=False)
  254. postproc.add_option('--extract-audio', action='store_true', dest='extractaudio', default=False,
  255. help='convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)')
  256. postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best',
  257. help='"best", "aac", "vorbis", "mp3", "m4a", or "wav"; best by default')
  258. postproc.add_option('--audio-quality', metavar='QUALITY', dest='audioquality', default='5',
  259. help='ffmpeg/avconv audio quality specification, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default 5)')
  260. postproc.add_option('-k', '--keep-video', action='store_true', dest='keepvideo', default=False,
  261. help='keeps the video file on disk after the post-processing; the video is erased by default')
  262. parser.add_option_group(general)
  263. parser.add_option_group(selection)
  264. parser.add_option_group(filesystem)
  265. parser.add_option_group(verbosity)
  266. parser.add_option_group(video_format)
  267. parser.add_option_group(authentication)
  268. parser.add_option_group(postproc)
  269. xdg_config_home = os.environ.get('XDG_CONFIG_HOME')
  270. if xdg_config_home:
  271. userConf = os.path.join(xdg_config_home, 'youtube-dl.conf')
  272. else:
  273. userConf = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf')
  274. argv = _readOptions('/etc/youtube-dl.conf') + _readOptions(userConf) + sys.argv[1:]
  275. opts, args = parser.parse_args(argv)
  276. return parser, opts, args
  277. def gen_extractors():
  278. """ Return a list of an instance of every supported extractor.
  279. The order does matter; the first extractor matched is the one handling the URL.
  280. """
  281. return [
  282. YoutubePlaylistIE(),
  283. YoutubeChannelIE(),
  284. YoutubeUserIE(),
  285. YoutubeSearchIE(),
  286. YoutubeIE(),
  287. MetacafeIE(),
  288. DailymotionIE(),
  289. GoogleIE(),
  290. GoogleSearchIE(),
  291. PhotobucketIE(),
  292. YahooIE(),
  293. YahooSearchIE(),
  294. DepositFilesIE(),
  295. FacebookIE(),
  296. BlipTVUserIE(),
  297. BlipTVIE(),
  298. VimeoIE(),
  299. MyVideoIE(),
  300. ComedyCentralIE(),
  301. EscapistIE(),
  302. CollegeHumorIE(),
  303. XVideosIE(),
  304. SoundcloudIE(),
  305. InfoQIE(),
  306. MixcloudIE(),
  307. StanfordOpenClassroomIE(),
  308. MTVIE(),
  309. YoukuIE(),
  310. XNXXIE(),
  311. GooglePlusIE(),
  312. GenericIE()
  313. ]
  314. def _real_main():
  315. parser, opts, args = parseOpts()
  316. # Open appropriate CookieJar
  317. if opts.cookiefile is None:
  318. jar = cookielib.CookieJar()
  319. else:
  320. try:
  321. jar = cookielib.MozillaCookieJar(opts.cookiefile)
  322. if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK):
  323. jar.load()
  324. except (IOError, OSError), err:
  325. sys.exit(u'ERROR: unable to open cookie file')
  326. # Set user agent
  327. if opts.user_agent is not None:
  328. std_headers['User-Agent'] = opts.user_agent
  329. # Dump user agent
  330. if opts.dump_user_agent:
  331. print std_headers['User-Agent']
  332. sys.exit(0)
  333. # Batch file verification
  334. batchurls = []
  335. if opts.batchfile is not None:
  336. try:
  337. if opts.batchfile == '-':
  338. batchfd = sys.stdin
  339. else:
  340. batchfd = open(opts.batchfile, 'r')
  341. batchurls = batchfd.readlines()
  342. batchurls = [x.strip() for x in batchurls]
  343. batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]
  344. except IOError:
  345. sys.exit(u'ERROR: batch file could not be read')
  346. all_urls = batchurls + args
  347. all_urls = map(lambda url: url.strip(), all_urls)
  348. # General configuration
  349. cookie_processor = urllib2.HTTPCookieProcessor(jar)
  350. proxy_handler = urllib2.ProxyHandler()
  351. opener = urllib2.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler())
  352. urllib2.install_opener(opener)
  353. socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
  354. extractors = gen_extractors()
  355. if opts.list_extractors:
  356. for ie in extractors:
  357. print(ie.IE_NAME)
  358. matchedUrls = filter(lambda url: ie.suitable(url), all_urls)
  359. all_urls = filter(lambda url: url not in matchedUrls, all_urls)
  360. for mu in matchedUrls:
  361. print(u' ' + mu)
  362. sys.exit(0)
  363. # Conflicting, missing and erroneous options
  364. if opts.usenetrc and (opts.username is not None or opts.password is not None):
  365. parser.error(u'using .netrc conflicts with giving username/password')
  366. if opts.password is not None and opts.username is None:
  367. parser.error(u'account username missing')
  368. if opts.outtmpl is not None and (opts.useliteral or opts.usetitle or opts.autonumber):
  369. parser.error(u'using output template conflicts with using title, literal title or auto number')
  370. if opts.usetitle and opts.useliteral:
  371. parser.error(u'using title conflicts with using literal title')
  372. if opts.username is not None and opts.password is None:
  373. opts.password = getpass.getpass(u'Type account password and press return:')
  374. if opts.ratelimit is not None:
  375. numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
  376. if numeric_limit is None:
  377. parser.error(u'invalid rate limit specified')
  378. opts.ratelimit = numeric_limit
  379. if opts.retries is not None:
  380. try:
  381. opts.retries = long(opts.retries)
  382. except (TypeError, ValueError), err:
  383. parser.error(u'invalid retry count specified')
  384. try:
  385. opts.playliststart = int(opts.playliststart)
  386. if opts.playliststart <= 0:
  387. raise ValueError(u'Playlist start must be positive')
  388. except (TypeError, ValueError), err:
  389. parser.error(u'invalid playlist start number specified')
  390. try:
  391. opts.playlistend = int(opts.playlistend)
  392. if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart):
  393. raise ValueError(u'Playlist end must be greater than playlist start')
  394. except (TypeError, ValueError), err:
  395. parser.error(u'invalid playlist end number specified')
  396. if opts.extractaudio:
  397. if opts.audioformat not in ['best', 'aac', 'mp3', 'vorbis', 'm4a', 'wav']:
  398. parser.error(u'invalid audio format specified')
  399. if opts.audioquality:
  400. opts.audioquality = opts.audioquality.strip('k').strip('K')
  401. if not opts.audioquality.isdigit():
  402. parser.error(u'invalid audio quality specified')
  403. # File downloader
  404. fd = FileDownloader({
  405. 'usenetrc': opts.usenetrc,
  406. 'username': opts.username,
  407. 'password': opts.password,
  408. 'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat),
  409. 'forceurl': opts.geturl,
  410. 'forcetitle': opts.gettitle,
  411. 'forcethumbnail': opts.getthumbnail,
  412. 'forcedescription': opts.getdescription,
  413. 'forcefilename': opts.getfilename,
  414. 'forceformat': opts.getformat,
  415. 'simulate': opts.simulate,
  416. 'skip_download': (opts.skip_download or opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat),
  417. 'format': opts.format,
  418. 'format_limit': opts.format_limit,
  419. 'listformats': opts.listformats,
  420. 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
  421. or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s')
  422. or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s')
  423. or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s')
  424. or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(stitle)s-%(id)s.%(ext)s')
  425. or (opts.useliteral and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s')
  426. or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
  427. or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
  428. or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
  429. or u'%(id)s.%(ext)s'),
  430. 'ignoreerrors': opts.ignoreerrors,
  431. 'ratelimit': opts.ratelimit,
  432. 'nooverwrites': opts.nooverwrites,
  433. 'retries': opts.retries,
  434. 'continuedl': opts.continue_dl,
  435. 'noprogress': opts.noprogress,
  436. 'playliststart': opts.playliststart,
  437. 'playlistend': opts.playlistend,
  438. 'logtostderr': opts.outtmpl == '-',
  439. 'consoletitle': opts.consoletitle,
  440. 'nopart': opts.nopart,
  441. 'updatetime': opts.updatetime,
  442. 'writedescription': opts.writedescription,
  443. 'writeinfojson': opts.writeinfojson,
  444. 'writesubtitles': opts.writesubtitles,
  445. 'subtitleslang': opts.subtitleslang,
  446. 'matchtitle': opts.matchtitle,
  447. 'rejecttitle': opts.rejecttitle,
  448. 'max_downloads': opts.max_downloads,
  449. 'prefer_free_formats': opts.prefer_free_formats,
  450. 'verbose': opts.verbose,
  451. })
  452. if opts.verbose:
  453. fd.to_screen(u'[debug] Proxy map: ' + str(proxy_handler.proxies))
  454. for extractor in extractors:
  455. fd.add_info_extractor(extractor)
  456. # PostProcessors
  457. if opts.extractaudio:
  458. fd.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, keepvideo=opts.keepvideo))
  459. # Update version
  460. if opts.update_self:
  461. updateSelf(fd, sys.argv[0])
  462. # Maybe do nothing
  463. if len(all_urls) < 1:
  464. if not opts.update_self:
  465. parser.error(u'you must provide at least one URL')
  466. else:
  467. sys.exit()
  468. try:
  469. retcode = fd.download(all_urls)
  470. except MaxDownloadsReached:
  471. fd.to_screen(u'--max-download limit reached, aborting.')
  472. retcode = 101
  473. # Dump cookie jar if requested
  474. if opts.cookiefile is not None:
  475. try:
  476. jar.save()
  477. except (IOError, OSError), err:
  478. sys.exit(u'ERROR: unable to save cookie jar')
  479. sys.exit(retcode)
  480. def main():
  481. try:
  482. _real_main()
  483. except DownloadError:
  484. sys.exit(1)
  485. except SameFileError:
  486. sys.exit(u'ERROR: fixed output name but more than one file to download')
  487. except KeyboardInterrupt:
  488. sys.exit(u'\nERROR: Interrupted by user')