Merge branch 'master' into subtitles_rework

11 years ago · 72836fcee4
--- a/README.md
+++ b/README.md
@ -113,25 +113,28 @@ which means you can modify it, redistribute it or use it however you like.
 ## Video Format Options:
    -f, --format FORMAT        video format code, specifiy the order of
                               preference using slashes: "-f 22/17/18"
                               preference using slashes: "-f 22/17/18". "-f mp4"
                               and "-f flv" are also supported
    --all-formats              download all available video formats
    --prefer-free-formats      prefer free video formats unless a specific one
                               is requested
    --max-quality FORMAT       highest quality format to download
    -F, --list-formats         list all available formats (currently youtube
                               only)
 ## Subtitle Options:
    --write-sub                write subtitle file (currently youtube only)
    --write-auto-sub           write automatic subtitle file (currently youtube
                               only)
    --only-sub                 [deprecated] alias of --skip-download
    --all-subs                 downloads all the available subtitles of the
                               video (currently youtube only)
                               video
    --list-subs                lists all available subtitles for the video
                               (currently youtube only)
    --sub-format FORMAT        subtitle format [srt/sbv/vtt] (default=srt)
                               (currently youtube only)
    --sub-lang LANG            language of the subtitles to download (optional)
                               use IETF language tags like 'en'
    --sub-format FORMAT        subtitle format (default=srt) ([sbv/vtt] youtube
                               only)
    --sub-lang LANGS           languages of the subtitles to download (optional)
                               separated by commas, use IETF language tags like
                               'en,pt'
 ## Authentication Options:
    -u, --username USERNAME    account username
@ -153,6 +156,8 @@ which means you can modify it, redistribute it or use it however you like.
                               processing; the video is erased by default
    --no-post-overwrites       do not overwrite post-processed files; the post-
                               processed files are overwritten by default
    --embed-subs               embed subtitles in the video (only for mp4
                               videos)
 # CONFIGURATION
--- a/devscripts/bash-completion.in
+++ b/devscripts/bash-completion.in
@ -4,8 +4,12 @@ __youtube-dl()
    COMPREPLY=()
    cur="${COMP_WORDS[COMP_CWORD]}"
    opts="{{flags}}"
    keywords=":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater"
    if [[ ${cur} == * ]] ; then
    if [[ ${cur} =~ : ]]; then
        COMPREPLY=( $(compgen -W "${keywords}" -- ${cur}) )
        return 0
    elif [[ ${cur} == * ]] ; then
        COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) )
        return 0
    fi
--- a/devscripts/gh-pages/add-version.py
+++ b/devscripts/gh-pages/add-version.py
@ -6,28 +6,35 @@ import hashlib
 import urllib.request
 if len(sys.argv) <= 1:
 	print('Specify the version number as parameter')
 	sys.exit()
    print('Specify the version number as parameter')
    sys.exit()
 version = sys.argv[1]
 with open('update/LATEST_VERSION', 'w') as f:
 	f.write(version)
    f.write(version)
 versions_info = json.load(open('update/versions.json'))
 if 'signature' in versions_info:
 	del versions_info['signature']
    del versions_info['signature']
 new_version = {}
 filenames = {'bin': 'youtube-dl', 'exe': 'youtube-dl.exe', 'tar': 'youtube-dl-%s.tar.gz' % version}
 filenames = {
    'bin': 'youtube-dl',
    'exe': 'youtube-dl.exe',
    'tar': 'youtube-dl-%s.tar.gz' % version}
 build_dir = os.path.join('..', '..', 'build', version)
 for key, filename in filenames.items():
 	print('Downloading and checksumming %s...' %filename)
 	url = 'http://youtube-dl.org/downloads/%s/%s' % (version, filename)
 	data = urllib.request.urlopen(url).read()
 	sha256sum = hashlib.sha256(data).hexdigest()
 	new_version[key] = (url, sha256sum)
    fn = os.path.join(build_dir, filename)
    with open(fn, 'rb') as f:
        data = f.read()
    if not data:
        raise ValueError('File %s is empty!' % fn)
    sha256sum = hashlib.sha256(data).hexdigest()
    new_version[key] = (url, sha256sum)
 versions_info['versions'][version] = new_version
 versions_info['latest'] = version
 json.dump(versions_info, open('update/versions.json', 'w'), indent=4, sort_keys=True)
 with open('update/versions.json', 'w') as jsonf:
    json.dump(versions_info, jsonf, indent=4, sort_keys=True)
--- a/devscripts/gh-pages/update-feed.py
+++ b/devscripts/gh-pages/update-feed.py
@ -22,7 +22,7 @@ entry_template=textwrap.dedent("""
 									<atom:link href="http://rg3.github.io/youtube-dl" />
 									<atom:content type="xhtml">
 										<div xmlns="http://www.w3.org/1999/xhtml">
 											Downloads available at <a href="http://youtube-dl.org/downloads/@VERSION@/">http://youtube-dl.org/downloads/@VERSION@/</a>
 											Downloads available at <a href="https://yt-dl.org/downloads/@VERSION@/">https://yt-dl.org/downloads/@VERSION@/</a>
 										</div>
 									</atom:content>
 									<atom:author>
@ -54,4 +54,3 @@ atom_template = atom_template.replace('@ENTRIES@', entries_str)
 with open('update/releases.atom','w',encoding='utf-8') as atom_file:
 	atom_file.write(atom_template)
--- a/devscripts/gh-pages/update-sites.py
+++ b/devscripts/gh-pages/update-sites.py
@ -0,0 +1,33 @@
 #!/usr/bin/env python3
 import sys
 import os
 import textwrap
 # We must be able to import youtube_dl
 sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
 import youtube_dl
 def main():
    with open('supportedsites.html.in', 'r', encoding='utf-8') as tmplf:
        template = tmplf.read()
    ie_htmls = []
    for ie in sorted(youtube_dl.gen_extractors(), key=lambda i: i.IE_NAME):
        ie_html = '<b>{}</b>'.format(ie.IE_NAME)
        try:
            ie_html += ': {}'.format(ie.IE_DESC)
        except AttributeError:
            pass
        if ie.working() == False:
            ie_html += ' (Currently broken)'
        ie_htmls.append('<li>{}</li>'.format(ie_html))
    template = template.replace('@SITES@', textwrap.indent('\n'.join(ie_htmls), '\t'))
    with open('supportedsites.html', 'w', encoding='utf-8') as sitesf:
        sitesf.write(template)
 if __name__ == '__main__':
    main()
--- a/devscripts/release.sh
+++ b/devscripts/release.sh
@ -67,7 +67,7 @@ RELEASE_FILES="youtube-dl youtube-dl.exe youtube-dl-$version.tar.gz"
 (cd build/$version/ && sha512sum $RELEASE_FILES > SHA2-512SUMS)
 git checkout HEAD -- youtube-dl youtube-dl.exe
 /bin/echo -e "\n### Signing and uploading the new binaries to youtube-dl.org..."
 /bin/echo -e "\n### Signing and uploading the new binaries to yt-dl.org ..."
 for f in $RELEASE_FILES; do gpg --detach-sig "build/$version/$f"; done
 scp -r "build/$version" ytdl@yt-dl.org:html/tmp/
 ssh ytdl@yt-dl.org "mv html/tmp/$version html/downloads/"
@ -85,6 +85,7 @@ ROOT=$(pwd)
    "$ROOT/devscripts/gh-pages/sign-versions.py" < "$ROOT/updates_key.pem"
    "$ROOT/devscripts/gh-pages/generate-download.py"
    "$ROOT/devscripts/gh-pages/update-copyright.py"
    "$ROOT/devscripts/gh-pages/update-sites.py"
    git add *.html *.html.in update
    git commit -m "release $version"
    git show HEAD
--- a/devscripts/youtube_genalgo.py
+++ b/devscripts/youtube_genalgo.py
@ -14,21 +14,21 @@ tests = [
    # 89 
    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'",
     "/?;:|}<[{=+-_)(*&^%$#@!MqBVCXZASDFGHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuyt"),
    # 88
    # 88 - vflapUV9V 2013/08/28
    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<",
     "J:|}][{=+-_)(*&;%$#@>MNBVCXZASDFGH^KLPOIUYTREWQ0987654321mnbvcxzasdfghrklpoiuytej"),
     "ioplkjhgfdsazxcvbnm12<4567890QWERTYUIOZLKJHGFDSAeXCVBNM!@#$%^&*()_-+={[]}|:;?/>.3"),
    # 87
    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<",
     "uioplkjhgfdsazxcvbnm1t34567890QWE2TYUIOPLKJHGFDSAZXCVeNM!@#$^&*()_-+={[]}|:;?/>.<"),
    # 86
    # 86 - vfluy6kdb 2013/09/06
    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<",
     "yuioplkjhgfdsazecvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<"),
     "yuioplkjhgfdsazxcvbnm12345678q0QWrRTYUIOELKJHGFD-AZXCVBNM!@#$%^&*()_<+={[|};?/>.S"),
    # 85
    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<",
     ".>/?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ0q876543r1mnbvcx9asdfghjklpoiuyt2"),
    # 84
    # 84 - vflg0g8PQ 2013/08/29 (sporadic)
    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<",
     "<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ09876543q1mnbvcxzasdfghjklpoiuew2"),
     ">?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWq0987654321mnbvcxzasdfghjklpoiuytr"),
    # 83
    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<",
     ".>/?;}[{=+_)(*&^%<#!MNBVCXZASPFGHJKLwOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytreq"),
--- a/test/test_all_urls.py
+++ b/test/test_all_urls.py
@ -11,24 +11,49 @@ from youtube_dl.extractor import YoutubeIE, YoutubePlaylistIE, YoutubeChannelIE,
 from helper import get_testcases
 class TestAllURLsMatching(unittest.TestCase):
    def setUp(self):
        self.ies = gen_extractors()
    def matching_ies(self, url):
        return [ie.IE_NAME for ie in self.ies if ie.suitable(url) and ie.IE_NAME != 'generic']
    def assertMatch(self, url, ie_list):
        self.assertEqual(self.matching_ies(url), ie_list)
    def test_youtube_playlist_matching(self):
        self.assertTrue(YoutubePlaylistIE.suitable(u'ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8'))
        self.assertTrue(YoutubePlaylistIE.suitable(u'UUBABnxM4Ar9ten8Mdjj1j0Q')) #585
        self.assertTrue(YoutubePlaylistIE.suitable(u'PL63F0C78739B09958'))
        self.assertTrue(YoutubePlaylistIE.suitable(u'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q'))
        self.assertTrue(YoutubePlaylistIE.suitable(u'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8'))
        self.assertTrue(YoutubePlaylistIE.suitable(u'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC'))
        self.assertTrue(YoutubePlaylistIE.suitable(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) #668
        self.assertFalse(YoutubePlaylistIE.suitable(u'PLtS2H6bU1M'))
        assertPlaylist = lambda url: self.assertMatch(url, ['youtube:playlist'])
        assertPlaylist(u'ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
        assertPlaylist(u'UUBABnxM4Ar9ten8Mdjj1j0Q') #585
        assertPlaylist(u'PL63F0C78739B09958')
        assertPlaylist(u'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
        assertPlaylist(u'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
        assertPlaylist(u'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
        assertPlaylist(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') #668
        self.assertFalse('youtube:playlist' in self.matching_ies(u'PLtS2H6bU1M'))
    def test_youtube_matching(self):
        self.assertTrue(YoutubeIE.suitable(u'PLtS2H6bU1M'))
        self.assertFalse(YoutubeIE.suitable(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) #668
        self.assertMatch('http://youtu.be/BaW_jenozKc', ['youtube'])
        self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube'])
    def test_youtube_channel_matching(self):
        self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM'))
        self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec'))
        self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM/videos'))
        assertChannel = lambda url: self.assertMatch(url, ['youtube:channel'])
        assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM')
        assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec')
        assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')
    def test_youtube_user_matching(self):
        self.assertMatch('www.youtube.com/NASAgovVideo/videos', ['youtube:user'])
    def test_youtube_feeds(self):
        self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:watch_later'])
        self.assertMatch('https://www.youtube.com/feed/subscriptions', ['youtube:subscriptions'])
        self.assertMatch('https://www.youtube.com/feed/recommended', ['youtube:recommended'])
        self.assertMatch('https://www.youtube.com/my_favorites', ['youtube:favorites'])
    def test_youtube_show_matching(self):
        self.assertMatch('http://www.youtube.com/show/airdisasters', ['youtube:show'])
    def test_justin_tv_channelid_matching(self):
        self.assertTrue(JustinTVIE.suitable(u"justin.tv/vanillatv"))
@ -63,15 +88,12 @@ class TestAllURLsMatching(unittest.TestCase):
                    self.assertFalse(ie.suitable(url), '%s should not match URL %r' % (type(ie).__name__, url))
    def test_keywords(self):
        ies = gen_extractors()
        matching_ies = lambda url: [ie.IE_NAME for ie in ies
                                    if ie.suitable(url) and ie.IE_NAME != 'generic']
        self.assertEqual(matching_ies(':ytsubs'), ['youtube:subscriptions'])
        self.assertEqual(matching_ies(':ytsubscriptions'), ['youtube:subscriptions'])
        self.assertEqual(matching_ies(':thedailyshow'), ['ComedyCentral'])
        self.assertEqual(matching_ies(':tds'), ['ComedyCentral'])
        self.assertEqual(matching_ies(':colbertreport'), ['ComedyCentral'])
        self.assertEqual(matching_ies(':cr'), ['ComedyCentral'])
        self.assertMatch(':ytsubs', ['youtube:subscriptions'])
        self.assertMatch(':ytsubscriptions', ['youtube:subscriptions'])
        self.assertMatch(':thedailyshow', ['ComedyCentral'])
        self.assertMatch(':tds', ['ComedyCentral'])
        self.assertMatch(':colbertreport', ['ComedyCentral'])
        self.assertMatch(':cr', ['ComedyCentral'])
 if __name__ == '__main__':
--- a/test/test_download.py
+++ b/test/test_download.py
@ -127,12 +127,11 @@ def generator(test_case):
                    info_dict = json.load(infof)
                for (info_field, expected) in tc.get('info_dict', {}).items():
                    if isinstance(expected, compat_str) and expected.startswith('md5:'):
                        self.assertEqual(expected, 'md5:' + md5(info_dict.get(info_field)))
                        got = 'md5:' + md5(info_dict.get(info_field))
                    else:
                        got = info_dict.get(info_field)
                        self.assertEqual(
                            expected, got,
                            u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
                    self.assertEqual(expected, got,
                        u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
                # If checkable fields are missing from the test case, print the info_dict
                test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
--- a/youtube_dl/FileDownloader.py
+++ b/youtube_dl/FileDownloader.py
@ -63,6 +63,17 @@ class FileDownloader(object):
        converted = float(bytes) / float(1024 ** exponent)
        return '%.2f%s' % (converted, suffix)
    @staticmethod
    def format_seconds(seconds):
        (mins, secs) = divmod(seconds, 60)
        (hours, eta_mins) = divmod(mins, 60)
        if hours > 99:
            return '--:--:--'
        if hours == 0:
            return '%02d:%02d' % (mins, secs)
        else:
            return '%02d:%02d:%02d' % (hours, mins, secs)
    @staticmethod
    def calc_percent(byte_counter, data_len):
        if data_len is None:
@ -78,14 +89,7 @@ class FileDownloader(object):
            return '--:--'
        rate = float(current) / dif
        eta = int((float(total) - float(current)) / rate)
        (eta_mins, eta_secs) = divmod(eta, 60)
        (eta_hours, eta_mins) = divmod(eta_mins, 60)
        if eta_hours > 99:
            return '--:--:--'
        if eta_hours == 0:
            return '%02d:%02d' % (eta_mins, eta_secs)
        else:
            return '%02d:%02d:%02d' % (eta_hours, eta_mins, eta_secs)
        return FileDownloader.format_seconds(eta)
    @staticmethod
    def calc_speed(start, now, bytes):
@ -234,12 +238,14 @@ class FileDownloader(object):
        """Report it was impossible to resume download."""
        self.to_screen(u'[download] Unable to resume')
    def report_finish(self):
    def report_finish(self, data_len_str, tot_time):
        """Report download finished."""
        if self.params.get('noprogress', False):
            self.to_screen(u'[download] Download completed')
        else:
            self.to_screen(u'')
            clear_line = (u'\x1b[K' if sys.stderr.isatty() and os.name != 'nt' else u'')
            self.to_screen(u'\r%s[download] 100%% of %s in %s' %
                (clear_line, data_len_str, self.format_seconds(tot_time)))
    def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url):
        self.report_destination(filename)
@ -542,7 +548,7 @@ class FileDownloader(object):
            self.report_error(u'Did not get any data blocks')
            return False
        stream.close()
        self.report_finish()
        self.report_finish(data_len_str, (time.time() - start))
        if data_len is not None and byte_counter != data_len:
            raise ContentTooShortError(byte_counter, int(data_len))
        self.try_rename(tmpfilename, filename)
--- a/youtube_dl/PostProcessor.py
+++ b/youtube_dl/PostProcessor.py
@ -137,7 +137,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
        try:
            FFmpegPostProcessor.run_ffmpeg(self, path, out_path, opts)
        except FFmpegPostProcessorError as err:
            raise AudioConversionError(err.message)
            raise AudioConversionError(err.msg)
    def run(self, information):
        path = information['filepath']
@ -207,7 +207,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
        except:
            etype,e,tb = sys.exc_info()
            if isinstance(e, AudioConversionError):
                msg = u'audio conversion failed: ' + e.message
                msg = u'audio conversion failed: ' + e.msg
            else:
                msg = u'error running ' + (self._exes['avconv'] and 'avconv' or 'ffmpeg')
            raise PostProcessingError(msg)
@ -458,6 +458,7 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
        opts.extend(['-f', 'mp4'])
        temp_filename = filename + u'.temp'
        self._downloader.to_screen(u'[ffmpeg] Embedding subtitles in \'%s\'' % filename)
        self.run_ffmpeg_multiple_files(input_files, temp_filename, opts)
        os.remove(encodeFilename(filename))
        os.rename(encodeFilename(temp_filename), encodeFilename(filename))
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@ -76,7 +76,7 @@ class YoutubeDL(object):
    allsubtitles:      Downloads all the subtitles of the video
    listsubtitles:     Lists all available subtitles for the video
    subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
    subtitleslangs:     Language of the subtitles to download
    subtitleslangs:    List of languages of the subtitles to download
    keepvideo:         Keep the video file after post-processing
    daterange:         A DateRange object, download only if the upload_date is in the range.
    skip_download:     Skip the actual download of the video file
@ -97,6 +97,7 @@ class YoutubeDL(object):
    def __init__(self, params):
        """Create a FileDownloader object with the given options."""
        self._ies = []
        self._ies_instances = {}
        self._pps = []
        self._progress_hooks = []
        self._download_retcode = 0
@ -111,8 +112,21 @@ class YoutubeDL(object):
    def add_info_extractor(self, ie):
        """Add an InfoExtractor object to the end of the list."""
        self._ies.append(ie)
        self._ies_instances[ie.ie_key()] = ie
        ie.set_downloader(self)
    def get_info_extractor(self, ie_key):
        """
        Get an instance of an IE with name ie_key, it will try to get one from
        the _ies list, if there's no instance it will create a new one and add
        it to the extractor list.
        """
        ie = self._ies_instances.get(ie_key)
        if ie is None:
            ie = get_info_extractor(ie_key)()
            self.add_info_extractor(ie)
        return ie
    def add_default_info_extractors(self):
        """
        Add the InfoExtractors returned by gen_extractors to the end of the list
@ -294,9 +308,7 @@ class YoutubeDL(object):
         '''
        if ie_key:
            ie = get_info_extractor(ie_key)()
            ie.set_downloader(self)
            ies = [ie]
            ies = [self.get_info_extractor(ie_key)]
        else:
            ies = self._ies
@ -448,7 +460,8 @@ class YoutubeDL(object):
        if self.params.get('forceid', False):
            compat_print(info_dict['id'])
        if self.params.get('forceurl', False):
            compat_print(info_dict['url'])
            # For RTMP URLs, also include the playpath
            compat_print(info_dict['url'] + info_dict.get('play_path', u''))
        if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
            compat_print(info_dict['thumbnail'])
        if self.params.get('forcedescription', False) and 'description' in info_dict:
--- a/youtube_dl/init.py
+++ b/youtube_dl/init.py
@ -28,6 +28,7 @@ __authors__  = (
    'Axel Noack',
    'Albert Kim',
    'Pierre Rudloff',
    'Huarong Huo',
 )
 __license__ = 'Public Domain'
@ -45,6 +46,7 @@ import sys
 import warnings
 import platform
 from .utils import *
 from .update import update_self
 from .version import __version__
@ -99,6 +101,16 @@ def parseOpts(overrideArguments=None):
            pass
        return None
    def _hide_login_info(opts):
        opts = list(opts)
        for private_opt in ['-p', '--password', '-u', '--username']:
            try:
                i = opts.index(private_opt)
                opts[i+1] = '<PRIVATE>'
            except ValueError:
                pass
        return opts
    max_width = 80
    max_help_position = 80
@ -181,7 +193,7 @@ def parseOpts(overrideArguments=None):
    video_format.add_option('-f', '--format',
            action='store', dest='format', metavar='FORMAT',
            help='video format code, specifiy the order of preference using slashes: "-f 22/17/18"')
            help='video format code, specifiy the order of preference using slashes: "-f 22/17/18". "-f mp4" and "-f flv" are also supported')
    video_format.add_option('--all-formats',
            action='store_const', dest='format', help='download all available video formats', const='all')
    video_format.add_option('--prefer-free-formats',
@ -354,9 +366,9 @@ def parseOpts(overrideArguments=None):
        argv = systemConf + userConf + commandLineConf
        opts, args = parser.parse_args(argv)
        if opts.verbose:
            sys.stderr.write(u'[debug] System config: ' + repr(systemConf) + '\n')
            sys.stderr.write(u'[debug] User config: ' + repr(userConf) + '\n')
            sys.stderr.write(u'[debug] Command-line args: ' + repr(commandLineConf) + '\n')
            sys.stderr.write(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n')
            sys.stderr.write(u'[debug] User config: ' + repr(_hide_login_info(userConf)) + '\n')
            sys.stderr.write(u'[debug] Command-line args: ' + repr(_hide_login_info(commandLineConf)) + '\n')
    return parser, opts, args
@ -427,6 +439,10 @@ def _real_main(argv=None):
    proxy_handler = compat_urllib_request.ProxyHandler(proxies)
    https_handler = make_HTTPS_handler(opts)
    opener = compat_urllib_request.build_opener(https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
    # Delete the default user-agent header, which would otherwise apply in
    # cases where our custom HTTP handler doesn't come into play
    # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
    opener.addheaders =[]
    compat_urllib_request.install_opener(opener)
    socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
@ -604,7 +620,7 @@ def _real_main(argv=None):
                sys.exc_clear()
            except:
                pass
        sys.stderr.write(u'[debug] Python version %s - %s' %(platform.python_version(), platform.platform()) + u'\n')
        sys.stderr.write(u'[debug] Python version %s - %s' %(platform.python_version(), platform_name()) + u'\n')
        sys.stderr.write(u'[debug] Proxy map: ' + str(proxy_handler.proxies) + u'\n')
    ydl.add_default_info_extractors()
--- a/youtube_dl/aes.py
+++ b/youtube_dl/aes.py
@ -0,0 +1,202 @@
 __all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_decrypt_text']
 import base64
 from math import ceil
 from .utils import bytes_to_intlist, intlist_to_bytes
 BLOCK_SIZE_BYTES = 16
 def aes_ctr_decrypt(data, key, counter):
    """
    Decrypt with aes in counter mode
    @param {int[]} data        cipher
    @param {int[]} key         16/24/32-Byte cipher key
    @param {instance} counter  Instance whose next_value function (@returns {int[]}  16-Byte block)
                               returns the next counter block
    @returns {int[]}           decrypted data
    """
    expanded_key = key_expansion(key)
    block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
    decrypted_data=[]
    for i in range(block_count):
        counter_block = counter.next_value()
        block = data[i*BLOCK_SIZE_BYTES : (i+1)*BLOCK_SIZE_BYTES]
        block += [0]*(BLOCK_SIZE_BYTES - len(block))
        cipher_counter_block = aes_encrypt(counter_block, expanded_key)
        decrypted_data += xor(block, cipher_counter_block)
    decrypted_data = decrypted_data[:len(data)]
    return decrypted_data
 def key_expansion(data):
    """
    Generate key schedule
    @param {int[]} data  16/24/32-Byte cipher key
    @returns {int[]}     176/208/240-Byte expanded key 
    """
    data = data[:] # copy
    rcon_iteration = 1
    key_size_bytes = len(data)
    expanded_key_size_bytes = (key_size_bytes // 4 + 7) * BLOCK_SIZE_BYTES
    while len(data) < expanded_key_size_bytes:
        temp = data[-4:]
        temp = key_schedule_core(temp, rcon_iteration)
        rcon_iteration += 1
        data += xor(temp, data[-key_size_bytes : 4-key_size_bytes])
        for _ in range(3):
            temp = data[-4:]
            data += xor(temp, data[-key_size_bytes : 4-key_size_bytes])
        if key_size_bytes == 32:
            temp = data[-4:]
            temp = sub_bytes(temp)
            data += xor(temp, data[-key_size_bytes : 4-key_size_bytes])
        for _ in range(3 if key_size_bytes == 32  else 2 if key_size_bytes == 24 else 0):
            temp = data[-4:]
            data += xor(temp, data[-key_size_bytes : 4-key_size_bytes])
    data = data[:expanded_key_size_bytes]
    return data
 def aes_encrypt(data, expanded_key):
    """
    Encrypt one block with aes
    @param {int[]} data          16-Byte state
    @param {int[]} expanded_key  176/208/240-Byte expanded key 
    @returns {int[]}             16-Byte cipher
    """
    rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1
    data = xor(data, expanded_key[:BLOCK_SIZE_BYTES])
    for i in range(1, rounds+1):
        data = sub_bytes(data)
        data = shift_rows(data)
        if i != rounds:
            data = mix_columns(data)
        data = xor(data, expanded_key[i*BLOCK_SIZE_BYTES : (i+1)*BLOCK_SIZE_BYTES])
    return data
 def aes_decrypt_text(data, password, key_size_bytes):
    """
    Decrypt text
    - The first 8 Bytes of decoded 'data' are the 8 high Bytes of the counter
    - The cipher key is retrieved by encrypting the first 16 Byte of 'password'
      with the first 'key_size_bytes' Bytes from 'password' (if necessary filled with 0's)
    - Mode of operation is 'counter'
    @param {str} data                    Base64 encoded string
    @param {str,unicode} password        Password (will be encoded with utf-8)
    @param {int} key_size_bytes          Possible values: 16 for 128-Bit, 24 for 192-Bit or 32 for 256-Bit
    @returns {str}                       Decrypted data
    """
    NONCE_LENGTH_BYTES = 8
    data = bytes_to_intlist(base64.b64decode(data))
    password = bytes_to_intlist(password.encode('utf-8'))
    key = password[:key_size_bytes] + [0]*(key_size_bytes - len(password))
    key = aes_encrypt(key[:BLOCK_SIZE_BYTES], key_expansion(key)) * (key_size_bytes // BLOCK_SIZE_BYTES)
    nonce = data[:NONCE_LENGTH_BYTES]
    cipher = data[NONCE_LENGTH_BYTES:]
    class Counter:
        __value = nonce + [0]*(BLOCK_SIZE_BYTES - NONCE_LENGTH_BYTES)
        def next_value(self):
            temp = self.__value
            self.__value = inc(self.__value)
            return temp
    decrypted_data = aes_ctr_decrypt(cipher, key, Counter())
    plaintext = intlist_to_bytes(decrypted_data)
    return plaintext
 RCON = (0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36)
 SBOX = (0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76,
        0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0,
        0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC, 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15,
        0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, 0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75,
        0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0, 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84,
        0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B, 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF,
        0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85, 0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8,
        0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5, 0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2,
        0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17, 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73,
        0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, 0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB,
        0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C, 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79,
        0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9, 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08,
        0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, 0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A,
        0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E,
        0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94, 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF,
        0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16)
 MIX_COLUMN_MATRIX = ((2,3,1,1),
                     (1,2,3,1),
                     (1,1,2,3),
                     (3,1,1,2))
 def sub_bytes(data):
    return [SBOX[x] for x in data]
 def rotate(data):
    return data[1:] + [data[0]]
 def key_schedule_core(data, rcon_iteration):
    data = rotate(data)
    data = sub_bytes(data)
    data[0] = data[0] ^ RCON[rcon_iteration]
    return data
 def xor(data1, data2):
    return [x^y for x, y in zip(data1, data2)]
 def mix_column(data):
    data_mixed = []
    for row in range(4):
        mixed = 0
        for column in range(4):
            addend = data[column]
            if MIX_COLUMN_MATRIX[row][column] in (2,3):
                addend <<= 1
                if addend > 0xff:
                    addend &= 0xff
                    addend ^= 0x1b
                if MIX_COLUMN_MATRIX[row][column] == 3:
                    addend ^= data[column]
            mixed ^= addend & 0xff
        data_mixed.append(mixed)
    return data_mixed
 def mix_columns(data):
    data_mixed = []
    for i in range(4):
        column = data[i*4 : (i+1)*4]
        data_mixed += mix_column(column)
    return data_mixed
 def shift_rows(data):
    data_shifted = []
    for column in range(4):
        for row in range(4):
            data_shifted.append( data[((column + row) & 0b11) * 4 + row] )
    return data_shifted
 def inc(data):
    data = data[:] # copy
    for i in range(len(data)-1,-1,-1):
        if data[i] == 255:
            data[i] = 0
        else:
            data[i] = data[i] + 1
            break
    return data
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -1,3 +1,5 @@
 from .appletrailers import AppleTrailersIE
 from .addanime import AddAnimeIE
 from .archiveorg import ArchiveOrgIE
 from .ard import ARDIE
 from .arte import ArteTvIE
@ -6,16 +8,21 @@ from .bandcamp import BandcampIE
 from .bliptv import BlipTVIE, BlipTVUserIE
 from .breakcom import BreakIE
 from .brightcove import BrightcoveIE
 from .c56 import C56IE
 from .canalplus import CanalplusIE
 from .canalc2 import Canalc2IE
 from .cnn import CNNIE
 from .collegehumor import CollegeHumorIE
 from .comedycentral import ComedyCentralIE
 from .condenast import CondeNastIE
 from .criterion import CriterionIE
 from .cspan import CSpanIE
 from .dailymotion import DailymotionIE, DailymotionPlaylistIE
 from .daum import DaumIE
 from .depositfiles import DepositFilesIE
 from .dotsub import DotsubIE
 from .dreisat import DreiSatIE
 from .defense import DefenseGouvFrIE
 from .ehow import EHowIE
 from .eighttracks import EightTracksIE
 from .escapist import EscapistIE
@ -29,6 +36,7 @@ from .gametrailers import GametrailersIE
 from .generic import GenericIE
 from .googleplus import GooglePlusIE
 from .googlesearch import GoogleSearchIE
 from .hark import HarkIE
 from .hotnewhiphop import HotNewHipHopIE
 from .howcast import HowcastIE
 from .hypem import HypemIE
@ -44,23 +52,30 @@ from .keek import KeekIE
 from .liveleak import LiveLeakIE
 from .livestream import LivestreamIE
 from .metacafe import MetacafeIE
 from .metacritic import MetacriticIE
 from .mit import TechTVMITIE, MITIE
 from .mixcloud import MixcloudIE
 from .mtv import MTVIE
 from .muzu import MuzuTVIE
 from .myspass import MySpassIE
 from .myvideo import MyVideoIE
 from .naver import NaverIE
 from .nba import NBAIE
 from .nbc import NBCNewsIE
 from .ooyala import OoyalaIE
 from .orf import ORFIE
 from .pbs import PBSIE
 from .photobucket import PhotobucketIE
 from .pornotube import PornotubeIE
 from .rbmaradio import RBMARadioIE
 from .redtube import RedTubeIE
 from .ringtv import RingTVIE
 from .ro220 import Ro220IE
 from .roxwel import RoxwelIE
 from .rtlnow import RTLnowIE
 from .sina import SinaIE
 from .slashdot import SlashdotIE
 from .sohu import SohuIE
 from .soundcloud import SoundcloudIE, SoundcloudSetIE
 from .spiegel import SpiegelIE
 from .stanfordoc import StanfordOpenClassroomIE
@ -71,18 +86,19 @@ from .ted import TEDIE
 from .tf1 import TF1IE
 from .thisav import ThisAVIE
 from .traileraddict import TrailerAddictIE
 from .trilulilu import TriluliluIE
 from .tudou import TudouIE
 from .tumblr import TumblrIE
 from .tutv import TutvIE
 from .ustream import UstreamIE
 from .unistra import UnistraIE
 from .ustream import UstreamIE
 from .vbox7 import Vbox7IE
 from .veehd import VeeHDIE
 from .veoh import VeohIE
 from .vevo import VevoIE
 from .videofyme import VideofyMeIE
 from .vimeo import VimeoIE, VimeoChannelIE
 from .vine import VineIE
 from .c56 import C56IE
 from .wat import WatIE
 from .weibo import WeiboIE
 from .wimp import WimpIE
@ -116,12 +132,14 @@ _ALL_CLASSES = [
 ]
 _ALL_CLASSES.append(GenericIE)
 def gen_extractors():
    """ Return a list of an instance of every supported extractor.
    The order does matter; the first extractor matched is the one handling the URL.
    """
    return [klass() for klass in _ALL_CLASSES]
 def get_info_extractor(ie_name):
    """Returns the info extractor class with the given ie_name"""
    return globals()[ie_name+'IE']
--- a/youtube_dl/extractor/addanime.py
+++ b/youtube_dl/extractor/addanime.py
@ -0,0 +1,75 @@
 import re
 from .common import InfoExtractor
 from ..utils import (
    compat_HTTPError,
    compat_str,
    compat_urllib_parse,
    compat_urllib_parse_urlparse,
    ExtractorError,
 )
 class AddAnimeIE(InfoExtractor):
    _VALID_URL = r'^http://(?:\w+\.)?add-anime\.net/watch_video.php\?(?:.*?)v=(?P<video_id>[\w_]+)(?:.*)'
    IE_NAME = u'AddAnime'
    _TEST = {
        u'url': u'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9',
        u'file': u'24MR3YO5SAS9.flv',
        u'md5': u'1036a0e0cd307b95bd8a8c3a5c8cfaf1',
        u'info_dict': {
            u"description": u"One Piece 606",
            u"title": u"One Piece 606"
        }
    }
    def _real_extract(self, url):
        try:
            mobj = re.match(self._VALID_URL, url)
            video_id = mobj.group('video_id')
            webpage = self._download_webpage(url, video_id)
        except ExtractorError as ee:
            if not isinstance(ee.cause, compat_HTTPError):
                raise
            redir_webpage = ee.cause.read().decode('utf-8')
            action = self._search_regex(
                r'<form id="challenge-form" action="([^"]+)"',
                redir_webpage, u'Redirect form')
            vc = self._search_regex(
                r'<input type="hidden" name="jschl_vc" value="([^"]+)"/>',
                redir_webpage, u'redirect vc value')
            av = re.search(
                r'a\.value = ([0-9]+)[+]([0-9]+)[*]([0-9]+);',
                redir_webpage)
            if av is None:
                raise ExtractorError(u'Cannot find redirect math task')
            av_res = int(av.group(1)) + int(av.group(2)) * int(av.group(3))
            parsed_url = compat_urllib_parse_urlparse(url)
            av_val = av_res + len(parsed_url.netloc)
            confirm_url = (
                parsed_url.scheme + u'://' + parsed_url.netloc +
                action + '?' +
                compat_urllib_parse.urlencode({
                    'jschl_vc': vc, 'jschl_answer': compat_str(av_val)}))
            self._download_webpage(
                confirm_url, video_id,
                note=u'Confirming after redirect')
            webpage = self._download_webpage(url, video_id)
        video_url = self._search_regex(r"var normal_video_file = '(.*?)';",
                                       webpage, u'video file URL')
        video_title = self._og_search_title(webpage)
        video_description = self._og_search_description(webpage)
        return {
            '_type': 'video',
            'id':  video_id,
            'url': video_url,
            'ext': 'flv',
            'title': video_title,
            'description': video_description
        }
--- a/youtube_dl/extractor/appletrailers.py
+++ b/youtube_dl/extractor/appletrailers.py
@ -0,0 +1,166 @@
 import re
 import xml.etree.ElementTree
 from .common import InfoExtractor
 from ..utils import (
    determine_ext,
 )
 class AppleTrailersIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?trailers.apple.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)'
    _TEST = {
        u"url": u"http://trailers.apple.com/trailers/wb/manofsteel/",
        u"playlist": [
            {
                u"file": u"manofsteel-trailer4.mov",
                u"md5": u"11874af099d480cc09e103b189805d5f",
                u"info_dict": {
                    u"duration": 111,
                    u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_11624.jpg",
                    u"title": u"Trailer 4",
                    u"upload_date": u"20130523",
                    u"uploader_id": u"wb",
                },
            },
            {
                u"file": u"manofsteel-trailer3.mov",
                u"md5": u"07a0a262aae5afe68120eed61137ab34",
                u"info_dict": {
                    u"duration": 182,
                    u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_10793.jpg",
                    u"title": u"Trailer 3",
                    u"upload_date": u"20130417",
                    u"uploader_id": u"wb",
                },
            },
            {
                u"file": u"manofsteel-trailer.mov",
                u"md5": u"e401fde0813008e3307e54b6f384cff1",
                u"info_dict": {
                    u"duration": 148,
                    u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_8703.jpg",
                    u"title": u"Trailer",
                    u"upload_date": u"20121212",
                    u"uploader_id": u"wb",
                },
            },
            {
                u"file": u"manofsteel-teaser.mov",
                u"md5": u"76b392f2ae9e7c98b22913c10a639c97",
                u"info_dict": {
                    u"duration": 93,
                    u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_6899.jpg",
                    u"title": u"Teaser",
                    u"upload_date": u"20120721",
                    u"uploader_id": u"wb",
                },
            }
        ]
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        movie = mobj.group('movie')
        uploader_id = mobj.group('company')
        playlist_url = url.partition(u'?')[0] + u'/includes/playlists/web.inc'
        playlist_snippet = self._download_webpage(playlist_url, movie)
        playlist_cleaned = re.sub(r'(?s)<script>.*?</script>', u'', playlist_snippet)
        playlist_html = u'<html>' + playlist_cleaned + u'</html>'
        size_cache = {}
        doc = xml.etree.ElementTree.fromstring(playlist_html)
        playlist = []
        for li in doc.findall('./div/ul/li'):
            title = li.find('.//h3').text
            video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower()
            thumbnail = li.find('.//img').attrib['src']
            date_el = li.find('.//p')
            upload_date = None
            m = re.search(r':\s?(?P<month>[0-9]{2})/(?P<day>[0-9]{2})/(?P<year>[0-9]{2})', date_el.text)
            if m:
                upload_date = u'20' + m.group('year') + m.group('month') + m.group('day')
            runtime_el = date_el.find('./br')
            m = re.search(r':\s?(?P<minutes>[0-9]+):(?P<seconds>[0-9]{1,2})', runtime_el.tail)
            duration = None
            if m:
                duration = 60 * int(m.group('minutes')) + int(m.group('seconds'))
            formats = []
            for formats_el in li.findall('.//a'):
                if formats_el.attrib['class'] != 'OverlayPanel':
                    continue
                target = formats_el.attrib['target']
                format_code = formats_el.text
                if 'Automatic' in format_code:
                    continue
                size_q = formats_el.attrib['href']
                size_id = size_q.rpartition('#videos-')[2]
                if size_id not in size_cache:
                    size_url = url + size_q
                    sizepage_html = self._download_webpage(
                        size_url, movie,
                        note=u'Downloading size info %s' % size_id,
                        errnote=u'Error while downloading size info %s' % size_id,
                    )
                    _doc = xml.etree.ElementTree.fromstring(sizepage_html)
                    size_cache[size_id] = _doc
                sizepage_doc = size_cache[size_id]
                links = sizepage_doc.findall('.//{http://www.w3.org/1999/xhtml}ul/{http://www.w3.org/1999/xhtml}li/{http://www.w3.org/1999/xhtml}a')
                for vid_a in links:
                    href = vid_a.get('href')
                    if not href.endswith(target):
                        continue
                    detail_q = href.partition('#')[0]
                    detail_url = url + '/' + detail_q
                    m = re.match(r'includes/(?P<detail_id>[^/]+)/', detail_q)
                    detail_id = m.group('detail_id')
                    detail_html = self._download_webpage(
                        detail_url, movie,
                        note=u'Downloading detail %s %s' % (detail_id, size_id),
                        errnote=u'Error while downloading detail %s %s' % (detail_id, size_id)
                    )
                    detail_doc = xml.etree.ElementTree.fromstring(detail_html)
                    movie_link_el = detail_doc.find('.//{http://www.w3.org/1999/xhtml}a')
                    assert movie_link_el.get('class') == 'movieLink'
                    movie_link = movie_link_el.get('href').partition('?')[0].replace('_', '_h')
                    ext = determine_ext(movie_link)
                    assert ext == 'mov'
                    formats.append({
                        'format': format_code,
                        'ext': ext,
                        'url': movie_link,
                    })
            info = {
                '_type': 'video',
                'id': video_id,
                'title': title,
                'formats': formats,
                'title': title,
                'duration': duration,
                'thumbnail': thumbnail,
                'upload_date': upload_date,
                'uploader_id': uploader_id,
                'user_agent': 'QuickTime compatible (youtube-dl)',
            }
            # TODO: Remove when #980 has been merged
            info['url'] = formats[-1]['url']
            info['ext'] = formats[-1]['ext']
            playlist.append(info)
        return {
            '_type': 'playlist',
            'id': movie,
            'entries': playlist,
        }
--- a/youtube_dl/extractor/c56.py
+++ b/youtube_dl/extractor/c56.py
@ -12,8 +12,8 @@ class C56IE(InfoExtractor):
    _TEST ={
        u'url': u'http://www.56.com/u39/v_OTM0NDA3MTY.html',
        u'file': u'93440716.mp4',
        u'md5': u'9dc07b5c8e978112a6441f9e75d2b59e',
        u'file': u'93440716.flv',
        u'md5': u'e59995ac63d0457783ea05f93f12a866',
        u'info_dict': {
            u'title': u'网事知多少 第32期：车怒',
        },
--- a/youtube_dl/extractor/canalc2.py
+++ b/youtube_dl/extractor/canalc2.py
@ -0,0 +1,35 @@
 # coding: utf-8
 import re
 from .common import InfoExtractor
 class Canalc2IE(InfoExtractor):
    _IE_NAME = 'canalc2.tv'
    _VALID_URL = r'http://.*?\.canalc2\.tv/video\.asp\?idVideo=(\d+)&voir=oui'
    _TEST = {
        u'url': u'http://www.canalc2.tv/video.asp?idVideo=12163&voir=oui',
        u'file': u'12163.mp4',
        u'md5': u'060158428b650f896c542dfbb3d6487f',
        u'info_dict': {
            u'title': u'Terrasses du Numérique'
        }
    }
    def _real_extract(self, url):
        video_id = re.match(self._VALID_URL, url).group(1)
        webpage = self._download_webpage(url, video_id)
        file_name = self._search_regex(
            r"so\.addVariable\('file','(.*?)'\);",
            webpage, 'file name')
        video_url = 'http://vod-flash.u-strasbg.fr:8080/' + file_name
        title = self._html_search_regex(
            r'class="evenement8">(.*?)</a>', webpage, u'title')
        return {'id': video_id,
                'ext': 'mp4',
                'url': video_url,
                'title': title,
                }
--- a/youtube_dl/extractor/canalplus.py
+++ b/youtube_dl/extractor/canalplus.py
@ -5,7 +5,7 @@ from .common import InfoExtractor
 from ..utils import unified_strdate
 class CanalplusIE(InfoExtractor):
    _VALID_URL = r'https?://www\.canalplus\.fr/.*?\?vid=(?P<id>\d+)'
    _VALID_URL = r'https?://(www\.canalplus\.fr/.*?\?vid=|player\.canalplus\.fr/#/)(?P<id>\d+)'
    _VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/cplus/%s'
    IE_NAME = u'canalplus.fr'
--- a/youtube_dl/extractor/cnn.py
+++ b/youtube_dl/extractor/cnn.py
@ -0,0 +1,58 @@
 import re
 import xml.etree.ElementTree
 from .common import InfoExtractor
 from ..utils import determine_ext
 class CNNIE(InfoExtractor):
    _VALID_URL = r'''(?x)https?://(edition\.)?cnn\.com/video/(data/.+?|\?)/
        (?P<path>.+?/(?P<title>[^/]+?)(?:\.cnn|(?=&)))'''
    _TESTS = [{
        u'url': u'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn',
        u'file': u'sports_2013_06_09_nadal-1-on-1.cnn.mp4',
        u'md5': u'3e6121ea48df7e2259fe73a0628605c4',
        u'info_dict': {
            u'title': u'Nadal wins 8th French Open title',
            u'description': u'World Sport\'s Amanda Davies chats with 2013 French Open champion Rafael Nadal.',
        },
    },
    {
        u"url": u"http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29",
        u"file": u"us_2013_08_21_sot-student-gives-epic-speech.georgia-institute-of-technology.mp4",
        u"md5": u"b5cc60c60a3477d185af8f19a2a26f4e",
        u"info_dict": {
            u"title": "Student's epic speech stuns new freshmen",
            u"description": "A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from \"2001: A Space Odyssey.\""
        }
    }]
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        path = mobj.group('path')
        page_title = mobj.group('title')
        info_url = u'http://cnn.com/video/data/3.0/%s/index.xml' % path
        info_xml = self._download_webpage(info_url, page_title)
        info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
        formats = []
        for f in info.findall('files/file'):
            mf = re.match(r'(\d+)x(\d+)(?:_(.*)k)?',f.attrib['bitrate'])
            if mf is not None:
                formats.append((int(mf.group(1)), int(mf.group(2)), int(mf.group(3) or 0), f.text))
        formats = sorted(formats)
        (_,_,_, video_path) = formats[-1]
        video_url = 'http://ht.cdn.turner.com/cnn/big%s' % video_path
        thumbnails = sorted([((int(t.attrib['height']),int(t.attrib['width'])), t.text) for t in info.findall('images/image')])
        thumbs_dict = [{'resolution': res, 'url': t_url} for (res, t_url) in thumbnails]
        return {'id': info.attrib['id'],
                'title': info.find('headline').text,
                'url': video_url,
                'ext': determine_ext(video_url),
                'thumbnail': thumbnails[-1][1],
                'thumbnails': thumbs_dict,
                'description': info.find('description').text,
                }
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@ -114,6 +114,11 @@ class InfoExtractor(object):
        """Real extraction process. Redefine in subclasses."""
        pass
    @classmethod
    def ie_key(cls):
        """A string for getting the InfoExtractor with get_info_extractor"""
        return cls.__name__[:-2]
    @property
    def IE_NAME(self):
        return type(self).__name__[:-2]
@ -129,7 +134,7 @@ class InfoExtractor(object):
        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
            if errnote is None:
                errnote = u'Unable to download webpage'
            raise ExtractorError(u'%s: %s' % (errnote, compat_str(err)), sys.exc_info()[2])
            raise ExtractorError(u'%s: %s' % (errnote, compat_str(err)), sys.exc_info()[2], cause=err)
    def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None):
        """ Returns a tuple (page content as string, URL handle) """
@ -140,12 +145,17 @@ class InfoExtractor(object):
        urlh = self._request_webpage(url_or_request, video_id, note, errnote)
        content_type = urlh.headers.get('Content-Type', '')
        webpage_bytes = urlh.read()
        m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
        if m:
            encoding = m.group(1)
        else:
            encoding = 'utf-8'
        webpage_bytes = urlh.read()
            m = re.search(br'<meta[^>]+charset=[\'"]?([^\'")]+)[ /\'">]',
                          webpage_bytes[:1024])
            if m:
                encoding = m.group(1).decode('ascii')
            else:
                encoding = 'utf-8'
        if self._downloader.params.get('dump_intermediate_pages', False):
            try:
                url = url_or_request.get_full_url()
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@ -37,14 +37,14 @@ class DailyMotionSubtitlesIE(NoAutoSubtitlesIE):
 class DailymotionIE(DailyMotionSubtitlesIE, InfoExtractor):
    """Information Extractor for Dailymotion"""
    _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^/]+)'
    _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/(?:embed/)?video/([^/]+)'
    IE_NAME = u'dailymotion'
    _TEST = {
        u'url': u'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech',
        u'file': u'x33vw9.mp4',
        u'md5': u'392c4b85a60a90dc4792da41ce3144eb',
        u'info_dict': {
            u"uploader": u"Alex and Van .", 
            u"uploader": u"Amphora Alex and Van .", 
            u"title": u"Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\""
        }
    }
@ -56,6 +56,7 @@ class DailymotionIE(DailyMotionSubtitlesIE, InfoExtractor):
        video_id = mobj.group(1).split('_')[0].split('?')[0]
        video_extension = 'mp4'
        url = 'http://www.dailymotion.com/video/%s' % video_id
        # Retrieve video webpage to extract further information
        request = compat_urllib_request.Request(url)
@ -78,7 +79,8 @@ class DailymotionIE(DailyMotionSubtitlesIE, InfoExtractor):
        embed_url = 'http://www.dailymotion.com/embed/video/%s' % video_id
        embed_page = self._download_webpage(embed_url, video_id,
                                            u'Downloading embed page')
        info = self._search_regex(r'var info = ({.*?}),', embed_page, 'video info')
        info = self._search_regex(r'var info = ({.*?}),$', embed_page,
            'video info', flags=re.MULTILINE)
        info = json.loads(info)
        # TODO: support choosing qualities
--- a/youtube_dl/extractor/daum.py
+++ b/youtube_dl/extractor/daum.py
@ -0,0 +1,74 @@
 # encoding: utf-8
 import re
 import xml.etree.ElementTree
 from .common import InfoExtractor
 from ..utils import (
    compat_urllib_parse,
    determine_ext,
 )
 class DaumIE(InfoExtractor):
    _VALID_URL = r'https?://tvpot\.daum\.net/.*?clipid=(?P<id>\d+)'
    IE_NAME = u'daum.net'
    _TEST = {
        u'url': u'http://tvpot.daum.net/clip/ClipView.do?clipid=52554690',
        u'file': u'52554690.mp4',
        u'info_dict': {
            u'title': u'DOTA 2GETHER 시즌2 6회 - 2부',
            u'description': u'DOTA 2GETHER 시즌2 6회 - 2부',
            u'upload_date': u'20130831',
            u'duration': 3868,
        },
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group(1)
        canonical_url = 'http://tvpot.daum.net/v/%s' % video_id
        webpage = self._download_webpage(canonical_url, video_id)
        full_id = self._search_regex(r'<link rel="video_src" href=".+?vid=(.+?)"',
            webpage, u'full id')
        query = compat_urllib_parse.urlencode({'vid': full_id})
        info_xml = self._download_webpage(
            'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id,
            u'Downloading video info')
        urls_xml = self._download_webpage(
            'http://videofarm.daum.net/controller/api/open/v1_2/MovieData.apixml?' + query,
            video_id, u'Downloading video formats info')
        info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
        urls = xml.etree.ElementTree.fromstring(urls_xml.encode('utf-8'))
        self.to_screen(u'%s: Getting video urls' % video_id)
        formats = []
        for format_el in urls.findall('result/output_list/output_list'):
            profile = format_el.attrib['profile']
            format_query = compat_urllib_parse.urlencode({
                'vid': full_id,
                'profile': profile,
            })
            url_xml = self._download_webpage(
                'http://videofarm.daum.net/controller/api/open/v1_2/MovieLocation.apixml?' + format_query,
                video_id, note=False)
            url_doc = xml.etree.ElementTree.fromstring(url_xml.encode('utf-8'))
            format_url = url_doc.find('result/url').text
            formats.append({
                'url': format_url,
                'ext': determine_ext(format_url),
                'format_id': profile,
            })
        info = {
            'id': video_id,
            'title': info.find('TITLE').text,
            'formats': formats,
            'thumbnail': self._og_search_thumbnail(webpage),
            'description': info.find('CONTENTS').text,
            'duration': int(info.find('DURATION').text),
            'upload_date': info.find('REGDTTM').text[:8],
        }
        # TODO: Remove when #980 has been merged
        info.update(formats[-1])
        return info
--- a/youtube_dl/extractor/defense.py
+++ b/youtube_dl/extractor/defense.py
@ -0,0 +1,39 @@
 import re
 import json
 from .common import InfoExtractor
 class DefenseGouvFrIE(InfoExtractor):
    _IE_NAME = 'defense.gouv.fr'
    _VALID_URL = (r'http://.*?\.defense\.gouv\.fr/layout/set/'
        r'ligthboxvideo/base-de-medias/webtv/(.*)')
    _TEST = {
        u'url': (u'http://www.defense.gouv.fr/layout/set/ligthboxvideo/'
        u'base-de-medias/webtv/attaque-chimique-syrienne-du-21-aout-2013-1'),
        u'file': u'11213.mp4',
        u'md5': u'75bba6124da7e63d2d60b5244ec9430c',
        "info_dict": {
            "title": "attaque-chimique-syrienne-du-21-aout-2013-1"
        }
    }
    def _real_extract(self, url):
        title = re.match(self._VALID_URL, url).group(1)
        webpage = self._download_webpage(url, title)
        video_id = self._search_regex(
            r"flashvars.pvg_id=\"(\d+)\";",
            webpage, 'ID')
        json_url = ('http://static.videos.gouv.fr/brightcovehub/export/json/'
            + video_id)
        info = self._download_webpage(json_url, title,
                                                  'Downloading JSON config')
        video_url = json.loads(info)['renditions'][0]['url']
        return {'id': video_id,
                'ext': 'mp4',
                'url': video_url,
                'title': title,
                }
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@ -8,11 +8,13 @@ from ..utils import (
    compat_urllib_error,
    compat_urllib_parse,
    compat_urllib_request,
    compat_urlparse,
    ExtractorError,
 )
 from .brightcove import BrightcoveIE
 class GenericIE(InfoExtractor):
    IE_DESC = u'Generic downloader that works on some sites'
    _VALID_URL = r'.*'
@ -23,7 +25,7 @@ class GenericIE(InfoExtractor):
            u'file': u'13601338388002.mp4',
            u'md5': u'85b90ccc9d73b4acd9138d3af4c27f89',
            u'info_dict': {
                u"uploader": u"www.hodiho.fr", 
                u"uploader": u"www.hodiho.fr",
                u"title": u"R\u00e9gis plante sa Jeep"
            }
        },
@ -107,6 +109,11 @@ class GenericIE(InfoExtractor):
        return new_url
    def _real_extract(self, url):
        parsed_url = compat_urlparse.urlparse(url)
        if not parsed_url.scheme:
            self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
            return self.url_result('http://' + url)
        try:
            new_url = self._test_redirect(url)
            if new_url:
@ -124,7 +131,7 @@ class GenericIE(InfoExtractor):
            raise ExtractorError(u'Invalid URL: %s' % url)
        self.report_extraction(video_id)
        # Look for BrigthCove:
        # Look for BrightCove:
        m_brightcove = re.search(r'<object.+?class=([\'"]).*?BrightcoveExperience.*?\1.+?</object>', webpage, re.DOTALL)
        if m_brightcove is not None:
            self.to_screen(u'Brightcove video detected.')
@ -151,7 +158,7 @@ class GenericIE(InfoExtractor):
                mobj = re.search(r'<meta.*?property="og:video".*?content="(.*?)"', webpage)
        if mobj is None:
            # HTML5 video
            mobj = re.search(r'<video[^<]*>.*?<source .*?src="([^"]+)"', webpage, flags=re.DOTALL)
            mobj = re.search(r'<video[^<]*(?:>.*?<source.*?)? src="([^"]+)"', webpage, flags=re.DOTALL)
        if mobj is None:
            raise ExtractorError(u'Invalid URL: %s' % url)
@ -160,8 +167,9 @@ class GenericIE(InfoExtractor):
        if mobj.group(1) is None:
            raise ExtractorError(u'Invalid URL: %s' % url)
        video_url = compat_urllib_parse.unquote(mobj.group(1))
        video_id = os.path.basename(video_url)
        video_url = mobj.group(1)
        video_url = compat_urlparse.urljoin(url, video_url)
        video_id = compat_urllib_parse.unquote(os.path.basename(video_url))
        # here's a fun little line of code for you:
        video_extension = os.path.splitext(video_id)[1][1:]
--- a/youtube_dl/extractor/googleplus.py
+++ b/youtube_dl/extractor/googleplus.py
@ -57,8 +57,8 @@ class GooglePlusIE(InfoExtractor):
            webpage, 'title', default=u'NA')
        # Step 2, Simulate clicking the image box to launch video
        DOMAIN = 'https://plus.google.com'
        video_page = self._search_regex(r'<a href="((?:%s)?/photos/.*?)"' % re.escape(DOMAIN),
        DOMAIN = 'https://plus.google.com/'
        video_page = self._search_regex(r'<a href="((?:%s)?photos/.*?)"' % re.escape(DOMAIN),
            webpage, u'video page URL')
        if not video_page.startswith(DOMAIN):
            video_page = DOMAIN + video_page
--- a/youtube_dl/extractor/hark.py
+++ b/youtube_dl/extractor/hark.py
@ -0,0 +1,37 @@
 # -*- coding: utf-8 -*-
 import re
 import json
 from .common import InfoExtractor
 from ..utils import determine_ext
 class HarkIE(InfoExtractor):
    _VALID_URL = r'https?://www\.hark\.com/clips/(.+?)-.+'
    _TEST = {
        u'url': u'http://www.hark.com/clips/mmbzyhkgny-obama-beyond-the-afghan-theater-we-only-target-al-qaeda-on-may-23-2013',
        u'file': u'mmbzyhkgny.mp3',
        u'md5': u'6783a58491b47b92c7c1af5a77d4cbee',
        u'info_dict': {
            u'title': u"Obama: 'Beyond The Afghan Theater, We Only Target Al Qaeda' on May 23, 2013",
            u'description': u'President Barack Obama addressed the nation live on May 23, 2013 in a speech aimed at addressing counter-terrorism policies including the use of drone strikes, detainees at Guantanamo Bay prison facility, and American citizens who are terrorists.',
            u'duration': 11,
        }
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group(1)
        json_url = "http://www.hark.com/clips/%s.json" %(video_id)
        info_json = self._download_webpage(json_url, video_id)
        info = json.loads(info_json)
        final_url = info['url']
        return {'id': video_id,
                'url' : final_url,
                'title': info['name'],
                'ext': determine_ext(final_url),
                'description': info['description'],
                'thumbnail': info['image_original'],
                'duration': info['duration'],
                }
--- a/youtube_dl/extractor/ign.py
+++ b/youtube_dl/extractor/ign.py
@ -13,7 +13,7 @@ class IGNIE(InfoExtractor):
    Some videos of it.ign.com are also supported
    """
    _VALID_URL = r'https?://.+?\.ign\.com/(?:videos|show_videos)(/.+)?/(?P<name_or_id>.+)'
    _VALID_URL = r'https?://.+?\.ign\.com/(?P<type>videos|show_videos|articles)(/.+)?/(?P<name_or_id>.+)'
    IE_NAME = u'ign.com'
    _CONFIG_URL_TEMPLATE = 'http://www.ign.com/videos/configs/id/%s.config'
@ -41,7 +41,11 @@ class IGNIE(InfoExtractor):
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        name_or_id = mobj.group('name_or_id')
        page_type = mobj.group('type')
        webpage = self._download_webpage(url, name_or_id)
        if page_type == 'articles':
            video_url = self._search_regex(r'var videoUrl = "(.+?)"', webpage, u'video url')
            return self.url_result(video_url, ie='IGN')
        video_id = self._find_video_id(webpage)
        result = self._get_video_info(video_id)
        description = self._html_search_regex(self._DESCRIPTION_RE,
@ -68,7 +72,7 @@ class IGNIE(InfoExtractor):
 class OneUPIE(IGNIE):
    """Extractor for 1up.com, it uses the ign videos system."""
    _VALID_URL = r'https?://gamevideos.1up.com/video/id/(?P<name_or_id>.+)'
    _VALID_URL = r'https?://gamevideos.1up.com/(?P<type>video)/id/(?P<name_or_id>.+)'
    IE_NAME = '1up.com'
    _DESCRIPTION_RE = r'<div id="vid_summary">(.+?)</div>'
--- a/youtube_dl/extractor/kankan.py
+++ b/youtube_dl/extractor/kankan.py
@ -21,8 +21,10 @@ class KankanIE(InfoExtractor):
        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)
        title = self._search_regex(r'G_TITLE=[\'"](.+?)[\'"]', webpage, u'video title')
        gcid = self._search_regex(r'lurl:[\'"]http://.+?/.+?/(.+?)/', webpage, u'gcid')
        title = self._search_regex(r'(?:G_TITLE=|G_MOVIE_TITLE = )[\'"](.+?)[\'"]', webpage, u'video title')
        surls = re.search(r'surls:\[\'.+?\'\]|lurl:\'.+?\.flv\'', webpage).group(0)
        gcids = re.findall(r"http://.+?/.+?/(.+?)/", surls)
        gcid = gcids[-1]
        video_info_page = self._download_webpage('http://p2s.cl.kankan.com/getCdnresource_flv?gcid=%s' % gcid,
                                                 video_id, u'Downloading video url info')
--- a/youtube_dl/extractor/metacafe.py
+++ b/youtube_dl/extractor/metacafe.py
@ -122,7 +122,7 @@ class MetacafeIE(InfoExtractor):
        video_title = self._html_search_regex(r'(?im)<title>(.*) - Video</title>', webpage, u'title')
        description = self._og_search_description(webpage)
        video_uploader = self._html_search_regex(
                r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("channel","([^"]+)"\);',
                r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("(?:channel|submiter)","([^"]+)"\);',
                webpage, u'uploader nickname', fatal=False)
        return {
--- a/youtube_dl/extractor/metacritic.py
+++ b/youtube_dl/extractor/metacritic.py
@ -0,0 +1,55 @@
 import re
 import xml.etree.ElementTree
 import operator
 from .common import InfoExtractor
 class MetacriticIE(InfoExtractor):
    _VALID_URL = r'https?://www\.metacritic\.com/.+?/trailers/(?P<id>\d+)'
    _TEST = {
        u'url': u'http://www.metacritic.com/game/playstation-4/infamous-second-son/trailers/3698222',
        u'file': u'3698222.mp4',
        u'info_dict': {
            u'title': u'inFamous: Second Son - inSide Sucker Punch: Smoke & Mirrors',
            u'description': u'Take a peak behind-the-scenes to see how Sucker Punch brings smoke into the universe of inFAMOUS Second Son on the PS4.',
            u'duration': 221,
        },
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)
        # The xml is not well formatted, there are raw '&'
        info_xml = self._download_webpage('http://www.metacritic.com/video_data?video=' + video_id,
            video_id, u'Downloading info xml').replace('&', '&amp;')
        info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
        clip = next(c for c in info.findall('playList/clip') if c.find('id').text == video_id)
        formats = []
        for videoFile in clip.findall('httpURI/videoFile'):
            rate_str = videoFile.find('rate').text
            video_url = videoFile.find('filePath').text
            formats.append({
                'url': video_url,
                'ext': 'mp4',
                'format_id': rate_str,
                'rate': int(rate_str),
            })
        formats.sort(key=operator.itemgetter('rate'))
        description = self._html_search_regex(r'<b>Description:</b>(.*?)</p>',
            webpage, u'description', flags=re.DOTALL)
        info = {
            'id': video_id,
            'title': clip.find('title').text,
            'formats': formats,
            'description': description,
            'duration': int(clip.find('duration').text),
        }
        # TODO: Remove when #980 has been merged
        info.update(formats[-1])
        return info
--- a/youtube_dl/extractor/mit.py
+++ b/youtube_dl/extractor/mit.py
@ -0,0 +1,74 @@
 import re
 import json
 from .common import InfoExtractor
 from ..utils import (
    clean_html,
    get_element_by_id,
 )
 class TechTVMITIE(InfoExtractor):
    IE_NAME = u'techtv.mit.edu'
    _VALID_URL = r'https?://techtv\.mit\.edu/(videos|embeds)/(?P<id>\d+)'
    _TEST = {
        u'url': u'http://techtv.mit.edu/videos/25418-mit-dna-learning-center-set',
        u'file': u'25418.mp4',
        u'md5': u'1f8cb3e170d41fd74add04d3c9330e5f',
        u'info_dict': {
            u'title': u'MIT DNA Learning Center Set',
            u'description': u'md5:82313335e8a8a3f243351ba55bc1b474',
        },
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        raw_page = self._download_webpage(
            'http://techtv.mit.edu/videos/%s' % video_id, video_id)
        clean_page = re.compile(u'<!--.*?-->', re.S).sub(u'', raw_page)
        base_url = self._search_regex(r'ipadUrl: \'(.+?cloudfront.net/)',
            raw_page, u'base url')
        formats_json = self._search_regex(r'bitrates: (\[.+?\])', raw_page,
            u'video formats')
        formats = json.loads(formats_json)
        formats = sorted(formats, key=lambda f: f['bitrate'])
        title = get_element_by_id('edit-title', clean_page)
        description = clean_html(get_element_by_id('edit-description', clean_page))
        thumbnail = self._search_regex(r'playlist:.*?url: \'(.+?)\'',
            raw_page, u'thumbnail', flags=re.DOTALL)
        return {'id': video_id,
                'title': title,
                'url': base_url + formats[-1]['url'].replace('mp4:', ''),
                'ext': 'mp4',
                'description': description,
                'thumbnail': thumbnail,
                }
 class MITIE(TechTVMITIE):
    IE_NAME = u'video.mit.edu'
    _VALID_URL = r'https?://video\.mit\.edu/watch/(?P<title>[^/]+)'
    _TEST = {
        u'url': u'http://video.mit.edu/watch/the-government-is-profiling-you-13222/',
        u'file': u'21783.mp4',
        u'md5': u'7db01d5ccc1895fc5010e9c9e13648da',
        u'info_dict': {
            u'title': u'The Government is Profiling You',
            u'description': u'md5:ad5795fe1e1623b73620dbfd47df9afd',
        },
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        page_title = mobj.group('title')
        webpage = self._download_webpage(url, page_title)
        self.to_screen('%s: Extracting %s url' % (page_title, TechTVMITIE.IE_NAME))
        embed_url = self._search_regex(r'<iframe .*?src="(.+?)"', webpage,
            u'embed url')
        return self.url_result(embed_url, ie='TechTVMIT')
--- a/youtube_dl/extractor/naver.py
+++ b/youtube_dl/extractor/naver.py
@ -0,0 +1,73 @@
 # encoding: utf-8
 import re
 import xml.etree.ElementTree
 from .common import InfoExtractor
 from ..utils import (
    compat_urllib_parse,
    ExtractorError,
 )
 class NaverIE(InfoExtractor):
    _VALID_URL = r'https?://tvcast\.naver\.com/v/(?P<id>\d+)'
    _TEST = {
        u'url': u'http://tvcast.naver.com/v/81652',
        u'file': u'81652.mp4',
        u'info_dict': {
            u'title': u'[9월 모의고사 해설강의][수학_김상희] 수학 A형 16~20번',
            u'description': u'합격불변의 법칙 메가스터디 | 메가스터디 수학 김상희 선생님이 9월 모의고사 수학A형 16번에서 20번까지 해설강의를 공개합니다.',
            u'upload_date': u'20130903',
        },
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group(1)
        webpage = self._download_webpage(url, video_id)
        m_id = re.search(r'var rmcPlayer = new nhn.rmcnmv.RMCVideoPlayer\("(.+?)", "(.+?)"',
            webpage)
        if m_id is None:
            raise ExtractorError(u'couldn\'t extract vid and key')
        vid = m_id.group(1)
        key = m_id.group(2)
        query = compat_urllib_parse.urlencode({'vid': vid, 'inKey': key,})
        query_urls = compat_urllib_parse.urlencode({
            'masterVid': vid,
            'protocol': 'p2p',
            'inKey': key,
        })
        info_xml = self._download_webpage(
            'http://serviceapi.rmcnmv.naver.com/flash/videoInfo.nhn?' + query,
            video_id, u'Downloading video info')
        urls_xml = self._download_webpage(
            'http://serviceapi.rmcnmv.naver.com/flash/playableEncodingOption.nhn?' + query_urls,
            video_id, u'Downloading video formats info')
        info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
        urls = xml.etree.ElementTree.fromstring(urls_xml.encode('utf-8'))
        formats = []
        for format_el in urls.findall('EncodingOptions/EncodingOption'):
            domain = format_el.find('Domain').text
            if domain.startswith('rtmp'):
                continue
            formats.append({
                'url': domain + format_el.find('uri').text,
                'ext': 'mp4',
                'width': int(format_el.find('width').text),
                'height': int(format_el.find('height').text),
            })
        info = {
            'id': video_id,
            'title': info.find('Subject').text,
            'formats': formats,
            'description': self._og_search_description(webpage),
            'thumbnail': self._og_search_thumbnail(webpage),
            'upload_date': info.find('WriteDate').text.replace('.', ''),
            'view_count': int(info.find('PlayCount').text),
        }
        # TODO: Remove when #980 has been merged
        info.update(formats[-1])
        return info
--- a/youtube_dl/extractor/nbc.py
+++ b/youtube_dl/extractor/nbc.py
@ -0,0 +1,33 @@
 import re
 import xml.etree.ElementTree
 from .common import InfoExtractor
 from ..utils import find_xpath_attr, compat_str
 class NBCNewsIE(InfoExtractor):
    _VALID_URL = r'https?://www\.nbcnews\.com/video/.+?/(?P<id>\d+)'
    _TEST = {
        u'url': u'http://www.nbcnews.com/video/nbc-news/52753292',
        u'file': u'52753292.flv',
        u'md5': u'47abaac93c6eaf9ad37ee6c4463a5179',
        u'info_dict': {
            u'title': u'Crew emerges after four-month Mars food study',
            u'description': u'md5:24e632ffac72b35f8b67a12d1b6ddfc1',
        },
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        info_xml = self._download_webpage('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id)
        info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8')).find('video')
        return {'id': video_id,
                'title': info.find('headline').text,
                'ext': 'flv',
                'url': find_xpath_attr(info, 'media', 'type', 'flashVideo').text,
                'description': compat_str(info.find('caption').text),
                'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text,
                }
--- a/youtube_dl/extractor/orf.py
+++ b/youtube_dl/extractor/orf.py
@ -0,0 +1,54 @@
 # coding: utf-8
 import re
 import xml.etree.ElementTree
 import json
 from .common import InfoExtractor
 from ..utils import (
    compat_urlparse,
    ExtractorError,
    find_xpath_attr,
 )
 class ORFIE(InfoExtractor):
    _VALID_URL = r'https?://tvthek.orf.at/(programs/.+?/episodes|topics/.+?)/(?P<id>\d+)'
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        playlist_id = mobj.group('id')
        webpage = self._download_webpage(url, playlist_id)
        flash_xml = self._search_regex('ORF.flashXML = \'(.+?)\'', webpage, u'flash xml')
        flash_xml = compat_urlparse.parse_qs('xml='+flash_xml)['xml'][0]
        flash_config = xml.etree.ElementTree.fromstring(flash_xml.encode('utf-8'))
        playlist_json = self._search_regex(r'playlist\': \'(\[.*?\])\'', webpage, u'playlist').replace(r'\"','"')
        playlist = json.loads(playlist_json)
        videos = []
        ns = '{http://tempuri.org/XMLSchema.xsd}'
        xpath = '%(ns)sPlaylist/%(ns)sItems/%(ns)sItem' % {'ns': ns}
        webpage_description = self._og_search_description(webpage)
        for (i, (item, info)) in enumerate(zip(flash_config.findall(xpath), playlist), 1):
            # Get best quality url
            rtmp_url = None
            for q in ['Q6A', 'Q4A', 'Q1A']:
                video_url = find_xpath_attr(item, '%sVideoUrl' % ns, 'quality', q)
                if video_url is not None:
                    rtmp_url = video_url.text
                    break
            if rtmp_url is None:
                raise ExtractorError(u'Couldn\'t get video url: %s' % info['id'])
            description = self._html_search_regex(
                r'id="playlist_entry_%s".*?<p>(.*?)</p>' % i, webpage,
                u'description', default=webpage_description, flags=re.DOTALL)
            videos.append({
                '_type': 'video',
                'id': info['id'],
                'title': info['title'],
                'url': rtmp_url,
                'ext': 'flv',
                'description': description,
                })
        return videos
--- a/youtube_dl/extractor/ro220.py
+++ b/youtube_dl/extractor/ro220.py
@ -0,0 +1,42 @@
 import re
 from .common import InfoExtractor
 from ..utils import (
    clean_html,
    compat_parse_qs,
 )
 class Ro220IE(InfoExtractor):
    IE_NAME = '220.ro'
    _VALID_URL = r'(?x)(?:https?://)?(?:www\.)?220\.ro/(?P<category>[^/]+)/(?P<shorttitle>[^/]+)/(?P<video_id>[^/]+)'
    _TEST = {
        u"url": u"http://www.220.ro/sport/Luati-Le-Banii-Sez-4-Ep-1/LYV6doKo7f/",
        u'file': u'LYV6doKo7f.mp4',
        u'md5': u'03af18b73a07b4088753930db7a34add',
        u'info_dict': {
            u"title": u"Luati-le Banii sez 4 ep 1",
            u"description": u"Iata-ne reveniti dupa o binemeritata vacanta. Va astept si pe Facebook cu pareri si comentarii.",
        }
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('video_id')
        webpage = self._download_webpage(url, video_id)
        flashVars_str = self._search_regex(
            r'<param name="flashVars" value="([^"]+)"',
            webpage, u'flashVars')
        flashVars = compat_parse_qs(flashVars_str)
        info = {
            '_type': 'video',
            'id': video_id,
            'ext': 'mp4',
            'url': flashVars['videoURL'][0],
            'title': flashVars['title'][0],
            'description': clean_html(flashVars['desc'][0]),
            'thumbnail': flashVars['preview'][0],
        }
        return info
--- a/youtube_dl/extractor/rtlnow.py
+++ b/youtube_dl/extractor/rtlnow.py
@ -8,8 +8,8 @@ from ..utils import (
 )
 class RTLnowIE(InfoExtractor):
    """Information Extractor for RTLnow, RTL2now and VOXnow"""
    _VALID_URL = r'(?:http://)?(?P<url>(?P<base_url>rtl(?:(?P<is_rtl2>2)|-)now\.rtl(?(is_rtl2)2|)\.de/|(?:www\.)?voxnow\.de/)[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P<video_id>[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)'
    """Information Extractor for RTL NOW, RTL2 NOW, SUPER RTL NOW and VOX NOW"""
    _VALID_URL = r'(?:http://)?(?P<url>(?P<base_url>rtl-now\.rtl\.de/|rtl2now\.rtl2\.de/|(?:www\.)?voxnow\.de/|(?:www\.)?superrtlnow\.de/)[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P<video_id>[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)'
    _TESTS = [{
        u'url': u'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1',
        u'file': u'90419.flv',
@ -48,6 +48,19 @@ class RTLnowIE(InfoExtractor):
        u'params': {
            u'skip_download': True,
        },
    },
    {
        u'url': u'http://superrtlnow.de/medicopter-117/angst.php?film_id=99205&player=1',
        u'file': u'99205.flv',
        u'info_dict': {
            u'upload_date': u'20080928', 
            u'title': u'Medicopter 117 - Angst!',
            u'description': u'Angst!',
            u'thumbnail': u'http://autoimg.static-fra.de/superrtlnow/287529/1500x1500/image2.jpg'
        },
        u'params': {
            u'skip_download': True,
        },
    }]
    def _real_extract(self,url):
--- a/youtube_dl/extractor/sohu.py
+++ b/youtube_dl/extractor/sohu.py
@ -0,0 +1,90 @@
 # encoding: utf-8
 import json
 import re
 from .common import InfoExtractor
 from ..utils import ExtractorError
 class SohuIE(InfoExtractor):
    _VALID_URL = r'https?://tv\.sohu\.com/\d+?/n(?P<id>\d+)\.shtml.*?'
    _TEST = {
        u'url': u'http://tv.sohu.com/20130724/n382479172.shtml#super',
        u'file': u'382479172.mp4',
        u'md5': u'bde8d9a6ffd82c63a1eefaef4eeefec7',
        u'info_dict': {
            u'title': u'MV：Far East Movement《The Illest》',
        },
    }
    def _real_extract(self, url):
        def _fetch_data(vid_id):
            base_data_url = u'http://hot.vrs.sohu.com/vrs_flash.action?vid='
            data_url = base_data_url + str(vid_id)
            data_json = self._download_webpage(
                data_url, video_id,
                note=u'Downloading JSON data for ' + str(vid_id))
            return json.loads(data_json)
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)
        raw_title = self._html_search_regex(r'(?s)<title>(.+?)</title>',
                                            webpage, u'video title')
        title = raw_title.partition('-')[0].strip()
        vid = self._html_search_regex(r'var vid="(\d+)"', webpage,
                                      u'video path')
        data = _fetch_data(vid)
        QUALITIES = ('ori', 'super', 'high', 'nor')
        vid_ids = [data['data'][q + 'Vid']
                   for q in QUALITIES
                   if data['data'][q + 'Vid'] != 0]
        if not vid_ids:
            raise ExtractorError(u'No formats available for this video')
        # For now, we just pick the highest available quality
        vid_id = vid_ids[-1]
        format_data = data if vid == vid_id else _fetch_data(vid_id)
        part_count = format_data['data']['totalBlocks']
        allot = format_data['allot']
        prot = format_data['prot']
        clipsURL = format_data['data']['clipsURL']
        su = format_data['data']['su']
        playlist = []
        for i in range(part_count):
            part_url = ('http://%s/?prot=%s&file=%s&new=%s' %
                        (allot, prot, clipsURL[i], su[i]))
            part_str = self._download_webpage(
                part_url, video_id,
                note=u'Downloading part %d of %d' % (i+1, part_count))
            part_info = part_str.split('|')
            video_url = '%s%s?key=%s' % (part_info[0], su[i], part_info[3])
            video_info = {
                'id': '%s_part%02d' % (video_id, i + 1),
                'title': title,
                'url': video_url,
                'ext': 'mp4',
            }
            playlist.append(video_info)
        if len(playlist) == 1:
            info = playlist[0]
            info['id'] = video_id
        else:
            info = {
                '_type': 'playlist',
                'entries': playlist,
                'id': video_id,
            }
        return info
--- a/youtube_dl/extractor/trilulilu.py
+++ b/youtube_dl/extractor/trilulilu.py
@ -0,0 +1,73 @@
 import json
 import re
 import xml.etree.ElementTree
 from .common import InfoExtractor
 class TriluliluIE(InfoExtractor):
    _VALID_URL = r'(?x)(?:https?://)?(?:www\.)?trilulilu\.ro/video-(?P<category>[^/]+)/(?P<video_id>[^/]+)'
    _TEST = {
        u"url": u"http://www.trilulilu.ro/video-animatie/big-buck-bunny-1",
        u'file': u"big-buck-bunny-1.mp4",
        u'info_dict': {
            u"title": u"Big Buck Bunny",
            u"description": u":) pentru copilul din noi",
        },
        # Server ignores Range headers (--test)
        u"params": {
            u"skip_download": True
        }
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('video_id')
        webpage = self._download_webpage(url, video_id)
        title = self._og_search_title(webpage)
        thumbnail = self._og_search_thumbnail(webpage)
        description = self._og_search_description(webpage)
        log_str = self._search_regex(
            r'block_flash_vars[ ]=[ ]({[^}]+})', webpage, u'log info')
        log = json.loads(log_str)
        format_url = (u'http://fs%(server)s.trilulilu.ro/%(hash)s/'
                      u'video-formats2' % log)
        format_str = self._download_webpage(
            format_url, video_id,
            note=u'Downloading formats',
            errnote=u'Error while downloading formats')
        format_doc = xml.etree.ElementTree.fromstring(format_str)
        video_url_template = (
            u'http://fs%(server)s.trilulilu.ro/stream.php?type=video'
            u'&source=site&hash=%(hash)s&username=%(userid)s&'
            u'key=ministhebest&format=%%s&sig=&exp=' %
            log)
        formats = [
            {
                'format': fnode.text,
                'url': video_url_template % fnode.text,
            }
            for fnode in format_doc.findall('./formats/format')
        ]
        info = {
            '_type': 'video',
            'id': video_id,
            'formats': formats,
            'title': title,
            'description': description,
            'thumbnail': thumbnail,
        }
        # TODO: Remove when #980 has been merged
        info['url'] = formats[-1]['url']
        info['ext'] = formats[-1]['format'].partition('-')[0]
        return info
--- a/youtube_dl/extractor/unistra.py
+++ b/youtube_dl/extractor/unistra.py
@ -11,7 +11,7 @@ class UnistraIE(InfoExtractor):
        u'md5': u'736f605cfdc96724d55bb543ab3ced24',
        u'info_dict': {
            u'title': u'M!ss Yella',
            u'description': u'md5:75e8439a3e2981cd5d4b6db232e8fdfc',
            u'description': u'md5:104892c71bd48e55d70b902736b81bbf',
        },
    }
--- a/youtube_dl/extractor/veehd.py
+++ b/youtube_dl/extractor/veehd.py
@ -0,0 +1,56 @@
 import re
 import json
 from .common import InfoExtractor
 from ..utils import (
    compat_urlparse,
    get_element_by_id,
    clean_html,
 )
 class VeeHDIE(InfoExtractor):
    _VALID_URL = r'https?://veehd.com/video/(?P<id>\d+)'
    _TEST = {
        u'url': u'http://veehd.com/video/4686958',
        u'file': u'4686958.mp4',
        u'info_dict': {
            u'title': u'Time Lapse View from Space ( ISS)',
            u'uploader_id': u'spotted',
            u'description': u'md5:f0094c4cf3a72e22bc4e4239ef767ad7',
        },
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)
        player_path = self._search_regex(r'\$\("#playeriframe"\).attr\({src : "(.+?)"',
            webpage, u'player path')
        player_url = compat_urlparse.urljoin(url, player_path)
        player_page = self._download_webpage(player_url, video_id,
            u'Downloading player page')
        config_json = self._search_regex(r'value=\'config=({.+?})\'',
            player_page, u'config json')
        config = json.loads(config_json)
        video_url = compat_urlparse.unquote(config['clip']['url'])
        title = clean_html(get_element_by_id('videoName', webpage).rpartition('|')[0])
        uploader_id = self._html_search_regex(r'<a href="/profile/\d+">(.+?)</a>',
            webpage, u'uploader')
        thumbnail = self._search_regex(r'<img id="veehdpreview" src="(.+?)"',
            webpage, u'thumbnail')
        description = self._html_search_regex(r'<td class="infodropdown".*?<div>(.*?)<ul',
            webpage, u'description', flags=re.DOTALL)
        return {
            '_type': 'video',
            'id': video_id,
            'title': title,
            'url': video_url,
            'ext': 'mp4',
            'uploader_id': uploader_id,
            'thumbnail': thumbnail,
            'description': description,
        }
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@ -44,6 +44,16 @@ class VimeoIE(InfoExtractor):
                u'title': u'Andy Allan - Putting the Carto into OpenStreetMap Cartography',
            },
        },
        {
            u'url': u'http://player.vimeo.com/video/54469442',
            u'file': u'54469442.mp4',
            u'md5': u'619b811a4417aa4abe78dc653becf511',
            u'note': u'Videos that embed the url in the player page',
            u'info_dict': {
                u'title': u'Kathy Sierra: Building the minimum Badass User, Business of Software',
                u'uploader': u'The BLN & Business of Software',
            },
        },
    ]
    def _login(self):
@ -112,7 +122,8 @@ class VimeoIE(InfoExtractor):
        # Extract the config JSON
        try:
            config = webpage.split(' = {config:')[1].split(',assets:')[0]
            config = self._search_regex([r' = {config:({.+?}),assets:', r'c=({.+?);'],
                webpage, u'info section', flags=re.DOTALL)
            config = json.loads(config)
        except:
            if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage):
@ -132,12 +143,22 @@ class VimeoIE(InfoExtractor):
        video_uploader_id = config["video"]["owner"]["url"].split('/')[-1] if config["video"]["owner"]["url"] else None
        # Extract video thumbnail
        video_thumbnail = config["video"]["thumbnail"]
        video_thumbnail = config["video"].get("thumbnail")
        if video_thumbnail is None:
            _, video_thumbnail = sorted((int(width), t_url) for (width, t_url) in config["video"]["thumbs"].items())[-1]
        # Extract video description
        video_description = get_element_by_attribute("itemprop", "description", webpage)
        if video_description: video_description = clean_html(video_description)
        else: video_description = u''
        video_description = None
        try:
            video_description = get_element_by_attribute("itemprop", "description", webpage)
            if video_description: video_description = clean_html(video_description)
        except AssertionError as err:
            # On some pages like (http://player.vimeo.com/video/54469442) the
            # html tags are not closed, python 2.6 cannot handle it
            if err.args[0] == 'we should not get here!':
                pass
            else:
                raise
        # Extract upload date
        video_upload_date = None
@ -154,14 +175,15 @@ class VimeoIE(InfoExtractor):
        # TODO bind to format param
        codecs = [('h264', 'mp4'), ('vp8', 'flv'), ('vp6', 'flv')]
        files = { 'hd': [], 'sd': [], 'other': []}
        config_files = config["video"].get("files") or config["request"].get("files")
        for codec_name, codec_extension in codecs:
            if codec_name in config["video"]["files"]:
                if 'hd' in config["video"]["files"][codec_name]:
            if codec_name in config_files:
                if 'hd' in config_files[codec_name]:
                    files['hd'].append((codec_name, codec_extension, 'hd'))
                elif 'sd' in config["video"]["files"][codec_name]:
                elif 'sd' in config_files[codec_name]:
                    files['sd'].append((codec_name, codec_extension, 'sd'))
                else:
                    files['other'].append((codec_name, codec_extension, config["video"]["files"][codec_name][0]))
                    files['other'].append((codec_name, codec_extension, config_files[codec_name][0]))
        for quality in ('hd', 'sd', 'other'):
            if len(files[quality]) > 0:
@ -173,8 +195,12 @@ class VimeoIE(InfoExtractor):
        else:
            raise ExtractorError(u'No known codec found')
        video_url = "http://player.vimeo.com/play_redirect?clip_id=%s&sig=%s&time=%s&quality=%s&codecs=%s&type=moogaloop_local&embed_location=" \
                    %(video_id, sig, timestamp, video_quality, video_codec.upper())
        video_url = None
        if isinstance(config_files[video_codec], dict):
            video_url = config_files[video_codec][video_quality].get("url")
        if video_url is None:
            video_url = "http://player.vimeo.com/play_redirect?clip_id=%s&sig=%s&time=%s&quality=%s&codecs=%s&type=moogaloop_local&embed_location=" \
                        %(video_id, sig, timestamp, video_quality, video_codec.upper())
        return [{
            'id':       video_id,
--- a/youtube_dl/extractor/wat.py
+++ b/youtube_dl/extractor/wat.py
@ -6,7 +6,6 @@ import re
 from .common import InfoExtractor
 from ..utils import (
    compat_urllib_parse,
    unified_strdate,
 )
--- a/youtube_dl/extractor/xhamster.py
+++ b/youtube_dl/extractor/xhamster.py
@ -3,7 +3,8 @@ import re
 from .common import InfoExtractor
 from ..utils import (
    compat_urllib_parse,
    unescapeHTML,
    determine_ext,
    ExtractorError,
 )
@ -36,15 +37,16 @@ class XHamsterIE(InfoExtractor):
            video_url = compat_urllib_parse.unquote(mobj.group('file'))
        else:
            video_url = mobj.group('server')+'/key='+mobj.group('file')
        video_extension = video_url.split('.')[-1]
        video_title = self._html_search_regex(r'<title>(?P<title>.+?) - xHamster\.com</title>',
            webpage, u'title')
        # Can't see the description anywhere in the UI
        # video_description = self._html_search_regex(r'<span>Description: </span>(?P<description>[^<]+)',
        #     webpage, u'description', fatal=False)
        # if video_description: video_description = unescapeHTML(video_description)
        # Only a few videos have an description
        mobj = re.search('<span>Description: </span>(?P<description>[^<]+)', webpage)
        if mobj:
            video_description = unescapeHTML(mobj.group('description'))
        else:
            video_description = None
        mobj = re.search(r'hint=\'(?P<upload_date_Y>[0-9]{4})-(?P<upload_date_m>[0-9]{2})-(?P<upload_date_d>[0-9]{2}) [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{3,4}\'', webpage)
        if mobj:
@ -62,9 +64,9 @@ class XHamsterIE(InfoExtractor):
        return [{
            'id':       video_id,
            'url':      video_url,
            'ext':      video_extension,
            'ext':      determine_ext(video_url),
            'title':    video_title,
            # 'description': video_description,
            'description': video_description,
            'upload_date': video_upload_date,
            'uploader_id': video_uploader_id,
            'thumbnail': video_thumbnail
--- a/youtube_dl/extractor/youporn.py
+++ b/youtube_dl/extractor/youporn.py
@ -12,14 +12,16 @@ from ..utils import (
    unescapeHTML,
    unified_strdate,
 )
 from ..aes import (
    aes_decrypt_text
 )
 class YouPornIE(InfoExtractor):
    _VALID_URL = r'^(?:https?://)?(?:\w+\.)?youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+)'
    _TEST = {
        u'url': u'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
        u'file': u'505835.mp4',
        u'md5': u'c37ddbaaa39058c76a7e86c6813423c1',
        u'md5': u'71ec5fcfddacf80f495efa8b6a8d9a89',
        u'info_dict': {
            u"upload_date": u"20101221", 
            u"description": u"Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?", 
@ -75,7 +77,15 @@ class YouPornIE(InfoExtractor):
        # Get all of the links from the page
        LINK_RE = r'(?s)<a href="(?P<url>[^"]+)">'
        links = re.findall(LINK_RE, download_list_html)
        if(len(links) == 0):
        # Get link of hd video if available
        mobj = re.search(r'var encryptedQuality720URL = \'(?P<encrypted_video_url>[a-zA-Z0-9+/]+={0,2})\';', webpage)
        if mobj != None:
            encrypted_video_url = mobj.group(u'encrypted_video_url')
            video_url = aes_decrypt_text(encrypted_video_url, video_title, 32).decode('utf-8')
            links = [video_url] + links
        if not links:
            raise ExtractorError(u'ERROR: no known formats available for video')
        self.to_screen(u'Links found: %d' % len(links))
@ -112,7 +122,7 @@ class YouPornIE(InfoExtractor):
            self._print_formats(formats)
            return
        req_format = self._downloader.params.get('format', None)
        req_format = self._downloader.params.get('format', 'best')
        self.to_screen(u'Format: %s' % req_format)
        if req_format is None or req_format == 'best':
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@ -194,7 +194,7 @@ class YoutubeIE(YoutubeSubtitlesIE, YoutubeBaseInfoExtractor):
    _VALID_URL = r"""^
                     (
                         (?:https?://)?                                       # http(s):// (optional)
                         (?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/|
                         (?:(?:(?:(?:\w+\.)?youtube(?:-nocookie)?\.com/|
                            tube\.majestyc\.net/)                             # the various hostnames, with wildcard subdomains
                         (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
                         (?:                                                  # the various things that can precede the ID:
@ -205,15 +205,18 @@ class YoutubeIE(YoutubeSubtitlesIE, YoutubeBaseInfoExtractor):
                                 (?:.*?&)?                                    # any other preceding param (like /?s=tuff&v=xxxx)
                                 v=
                             )
                         )?                                                   # optional -> youtube.com/xxxx is OK
                         ))
                         |youtu\.be/                                          # just youtu.be/xxxx
                         )
                     )?                                                       # all until now is optional -> you can pass the naked ID
                     ([0-9A-Za-z_-]+)                                         # here is it! the YouTube video ID
                     (?(1).+)?                                                # if we found the ID, everything can follow
                     $"""
    _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
    # Listed in order of quality
    _available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '17', '13',
                          '95', '94', '93', '92', '132', '151',
    _available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '36', '17', '13',
                          # Apple HTTP Live Streaming
                          '96', '95', '94', '93', '92', '132', '151',
                          # 3D
                          '85', '84', '102', '83', '101', '82', '100',
                          # Dash video
@ -222,8 +225,10 @@ class YoutubeIE(YoutubeSubtitlesIE, YoutubeBaseInfoExtractor):
                          # Dash audio
                          '141', '172', '140', '171', '139',
                          ]
    _available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '17', '13',
                                      '95', '94', '93', '92', '132', '151',
    _available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '36', '17', '13',
                                      # Apple HTTP Live Streaming
                                      '96', '95', '94', '93', '92', '132', '151',
                                      # 3D
                                      '85', '102', '84', '101', '83', '100', '82',
                                      # Dash video
                                      '138', '248', '137', '247', '136', '246', '245',
@ -231,11 +236,18 @@ class YoutubeIE(YoutubeSubtitlesIE, YoutubeBaseInfoExtractor):
                                      # Dash audio
                                      '172', '141', '171', '140', '139',
                                      ]
    _video_formats_map = {
        'flv': ['35', '34', '6', '5'],
        '3gp': ['36', '17', '13'],
        'mp4': ['38', '37', '22', '18'],
        'webm': ['46', '45', '44', '43'],
    }
    _video_extensions = {
        '13': '3gp',
        '17': 'mp4',
        '17': '3gp',
        '18': 'mp4',
        '22': 'mp4',
        '36': '3gp',
        '37': 'mp4',
        '38': 'mp4',
        '43': 'webm',
@ -252,7 +264,7 @@ class YoutubeIE(YoutubeSubtitlesIE, YoutubeBaseInfoExtractor):
        '101': 'webm',
        '102': 'webm',
        # videos that use m3u8
        # Apple HTTP Live Streaming
        '92': 'mp4',
        '93': 'mp4',
        '94': 'mp4',
@ -293,6 +305,7 @@ class YoutubeIE(YoutubeSubtitlesIE, YoutubeBaseInfoExtractor):
        '22': '720x1280',
        '34': '360x640',
        '35': '480x854',
        '36': '240x320',
        '37': '1080x1920',
        '38': '3072x4096',
        '43': '360x640',
@ -394,7 +407,7 @@ class YoutubeIE(YoutubeSubtitlesIE, YoutubeBaseInfoExtractor):
            u"info_dict": {
                u"upload_date": u"20120506",
                u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
                u"description": u"md5:b085c9804f5ab69f4adea963a2dceb3c",
                u"description": u"md5:3e2666e0a55044490499ea45fe9037b7",
                u"uploader": u"Icona Pop",
                u"uploader_id": u"IconaPop"
            }
@ -432,7 +445,7 @@ class YoutubeIE(YoutubeSubtitlesIE, YoutubeBaseInfoExtractor):
    @classmethod
    def suitable(cls, url):
        """Receives a URL and returns True if suitable for this IE."""
        if YoutubePlaylistIE.suitable(url) or YoutubeSubscriptionsIE.suitable(url): return False
        if YoutubePlaylistIE.suitable(url): return False
        return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
    def report_video_webpage_download(self, video_id):
@ -465,15 +478,15 @@ class YoutubeIE(YoutubeSubtitlesIE, YoutubeBaseInfoExtractor):
        elif len(s) == 89:
            return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1]
        elif len(s) == 88:
            return s[48] + s[81:67:-1] + s[82] + s[66:62:-1] + s[85] + s[61:48:-1] + s[67] + s[47:12:-1] + s[3] + s[11:3:-1] + s[2] + s[12]
            return s[7:28] + s[87] + s[29:45] + s[55] + s[46:55] + s[2] + s[56:87] + s[28]
        elif len(s) == 87:
            return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
        elif len(s) == 86:
            return s[5:20] + s[2] + s[21:]
            return s[5:34] + s[0] + s[35:38] + s[3] + s[39:45] + s[38] + s[46:53] + s[73] + s[54:73] + s[85] + s[74:85] + s[53]
        elif len(s) == 85:
            return s[83:34:-1] + s[0] + s[33:27:-1] + s[3] + s[26:19:-1] + s[34] + s[18:3:-1] + s[27]
        elif len(s) == 84:
            return s[83:27:-1] + s[0] + s[26:5:-1] + s[2:0:-1] + s[27]
            return s[81:36:-1] + s[0] + s[35:2:-1]
        elif len(s) == 83:
            return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0]
        elif len(s) == 82:
@ -537,13 +550,25 @@ class YoutubeIE(YoutubeSubtitlesIE, YoutubeBaseInfoExtractor):
            video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
        else:
            # Specific formats. We pick the first in a slash-delimeted sequence.
            # For example, if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
            # Format can be specified as itag or 'mp4' or 'flv' etc. We pick the highest quality
            # available in the specified format. For example,
            # if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
            # if '1/mp4/3/4' is requested and '1' and '5' (is a mp4) are available, we pick '1'.
            # if '1/mp4/3/4' is requested and '4' and '5' (is a mp4) are available, we pick '5'.
            req_formats = req_format.split('/')
            video_url_list = None
            for rf in req_formats:
                if rf in url_map:
                    video_url_list = [(rf, url_map[rf])]
                    break
                if rf in self._video_formats_map:
                    for srf in self._video_formats_map[rf]:
                        if srf in url_map:
                            video_url_list = [(srf, url_map[srf])]
                            break
                    else:
                        continue
                    break
            if video_url_list is None:
                raise ExtractorError(u'requested format not available')
        return video_url_list
@ -558,7 +583,7 @@ class YoutubeIE(YoutubeSubtitlesIE, YoutubeBaseInfoExtractor):
        manifest = self._download_webpage(manifest_url, video_id, u'Downloading formats manifest')
        formats_urls = _get_urls(manifest)
        for format_url in formats_urls:
            itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
            itag = self._search_regex(r'itag%3D(\d+?)/', format_url, 'itag')
            url_map[itag] = format_url
        return url_map
@ -860,8 +885,11 @@ class YoutubePlaylistIE(InfoExtractor):
            for entry in response['feed']['entry']:
                index = entry['yt$position']['$t']
                if 'media$group' in entry and 'media$player' in entry['media$group']:
                    videos.append((index, entry['media$group']['media$player']['url']))
                if 'media$group' in entry and 'yt$videoid' in entry['media$group']:
                    videos.append((
                        index,
                        'https://www.youtube.com/watch?v=' + entry['media$group']['yt$videoid']['$t']
                    ))
        videos = [v[1] for v in sorted(videos)]
@ -927,13 +955,20 @@ class YoutubeChannelIE(InfoExtractor):
 class YoutubeUserIE(InfoExtractor):
    IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
    _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/user/)|ytuser:)([A-Za-z0-9_-]+)'
    _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?)|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
    _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
    _GDATA_PAGE_SIZE = 50
    _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d'
    _VIDEO_INDICATOR = r'/watch\?v=(.+?)[\<&]'
    _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
    IE_NAME = u'youtube:user'
    @classmethod
    def suitable(cls, url):
        # Don't return True if the url can be extracted with other youtube
        # extractor, the regex would is too permissive and it would match.
        other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
        if any(ie.suitable(url) for ie in other_ies): return False
        else: return super(YoutubeUserIE, cls).suitable(url)
    def _real_extract(self, url):
        # Extract username
        mobj = re.match(self._VALID_URL, url)
@ -956,13 +991,15 @@ class YoutubeUserIE(InfoExtractor):
            page = self._download_webpage(gdata_url, username,
                                          u'Downloading video ids from %d to %d' % (start_index, start_index + self._GDATA_PAGE_SIZE))
            try:
                response = json.loads(page)
            except ValueError as err:
                raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
            # Extract video identifiers
            ids_in_page = []
            for mobj in re.finditer(self._VIDEO_INDICATOR, page):
                if mobj.group(1) not in ids_in_page:
                    ids_in_page.append(mobj.group(1))
            for entry in response['feed']['entry']:
                ids_in_page.append(entry['id']['$t'].split('/')[-1])
            video_ids.extend(ids_in_page)
            # A little optimization - if current page is not
@ -1101,7 +1138,7 @@ class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
    IE_NAME = u'youtube:favorites'
    IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
    _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:o?rites)?'
    _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
    _LOGIN_REQUIRED = True
    def _real_extract(self, url):
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@ -1,19 +1,20 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 import datetime
 import email.utils
 import errno
 import gzip
 import io
 import json
 import locale
 import os
 import platform
 import re
 import socket
 import sys
 import traceback
 import zlib
 import email.utils
 import socket
 import datetime
 try:
    import urllib.request as compat_urllib_request
@ -60,6 +61,11 @@ try:
 except ImportError: # Python 2
    import httplib as compat_http_client
 try:
    from urllib.error import HTTPError as compat_HTTPError
 except ImportError:  # Python 2
    from urllib2 import HTTPError as compat_HTTPError
 try:
    from subprocess import DEVNULL
    compat_subprocess_get_DEVNULL = lambda: DEVNULL
@ -207,7 +213,7 @@ if sys.version_info >= (2,7):
    def find_xpath_attr(node, xpath, key, val):
        """ Find the xpath xpath[@key=val] """
        assert re.match(r'^[a-zA-Z]+$', key)
        assert re.match(r'^[a-zA-Z@\s]*$', val)
        assert re.match(r'^[a-zA-Z0-9@\s]*$', val)
        expr = xpath + u"[@%s='%s']" % (key, val)
        return node.find(expr)
 else:
@ -489,7 +495,7 @@ def make_HTTPS_handler(opts):
 class ExtractorError(Exception):
    """Error during info extraction."""
    def __init__(self, msg, tb=None, expected=False):
    def __init__(self, msg, tb=None, expected=False, cause=None):
        """ tb, if given, is the original traceback (so that it can be printed out).
        If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
        """
@ -502,6 +508,7 @@ class ExtractorError(Exception):
        self.traceback = tb
        self.exc_info = sys.exc_info()  # preserve original exception
        self.cause = cause
    def format_traceback(self):
        if self.traceback is None:
@ -622,8 +629,23 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
        old_resp = resp
        # gzip
        if resp.headers.get('Content-encoding', '') == 'gzip':
            gz = gzip.GzipFile(fileobj=io.BytesIO(resp.read()), mode='r')
            resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
            content = resp.read()
            gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
            try:
                uncompressed = io.BytesIO(gz.read())
            except IOError as original_ioerror:
                # There may be junk add the end of the file
                # See http://stackoverflow.com/q/4928560/35070 for details
                for i in range(1, 1024):
                    try:
                        gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
                        uncompressed = io.BytesIO(gz.read())
                    except IOError:
                        continue
                    break
                else:
                    raise original_ioerror
            resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
            resp.msg = old_resp.msg
        # deflate
        if resp.headers.get('Content-encoding', '') == 'deflate':
@ -711,3 +733,31 @@ class DateRange(object):
        return self.start <= date <= self.end
    def __str__(self):
        return '%s - %s' % ( self.start.isoformat(), self.end.isoformat())
 def platform_name():
    """ Returns the platform name as a compat_str """
    res = platform.platform()
    if isinstance(res, bytes):
        res = res.decode(preferredencoding())
    assert isinstance(res, compat_str)
    return res
 def bytes_to_intlist(bs):
    if not bs:
        return []
    if isinstance(bs[0], int):  # Python 3
        return list(bs)
    else:
        return [ord(c) for c in bs]
 def intlist_to_bytes(xs):
    if not xs:
        return b''
    if isinstance(chr(0), bytes):  # Python 2
        return ''.join([chr(x) for x in xs])
    else:
        return bytes(xs)
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@ -1,2 +1,2 @@
 __version__ = '2013.08.22'
 __version__ = '2013.09.06.1'