Merge remote-tracking branch 'origin/master'

11 years ago · ef66b0c6ef
--- a/devscripts/youtube_genalgo.py
+++ b/devscripts/youtube_genalgo.py
@ -32,9 +32,9 @@ tests = [
    # 83
    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<",
     ".>/?;}[{=+_)(*&^%<#!MNBVCXZASPFGHJKLwOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytreq"),
    # 82 - vflZK4ZYR 2013/08/23
    # 82 - vflGNjMhJ 2013/09/12
    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<",
     "wertyuioplkjhgfdsaqxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&z(-+={[};?/>.<"),
     ".>/?;}[<=+-(*&^%$#@!MNBVCXeASDFGHKLPOqUYTREWQ0987654321mnbvcxzasdfghjklpoiuytrIwZ"),
    # 81 - vflLC8JvQ 2013/07/25
    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.",
     "C>/?;}[{=+-(*&^%$#@!MNBVYXZASDFGHKLPOIU.TREWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp"),
--- a/test/test_all_urls.py
+++ b/test/test_all_urls.py
@ -36,6 +36,7 @@ class TestAllURLsMatching(unittest.TestCase):
        self.assertFalse(YoutubeIE.suitable(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) #668
        self.assertMatch('http://youtu.be/BaW_jenozKc', ['youtube'])
        self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube'])
        self.assertMatch('https://youtube.googleapis.com/v/BaW_jenozKc', ['youtube'])

    def test_youtube_channel_matching(self):
        assertChannel = lambda url: self.assertMatch(url, ['youtube:channel'])
--- a/test/test_dailymotion_subtitles.py
+++ b/test/test_dailymotion_subtitles.py
@ -40,6 +40,7 @@ class TestDailymotionSubtitles(unittest.TestCase):
        subtitles = self.getSubtitles()
        self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792')
    def test_allsubtitles(self):
        self.DL.params['writesubtitles'] = True
        self.DL.params['allsubtitles'] = True
        subtitles = self.getSubtitles()
        self.assertEqual(len(subtitles.keys()), 5)
@ -54,6 +55,7 @@ class TestDailymotionSubtitles(unittest.TestCase):
        self.assertTrue(len(subtitles.keys()) == 0)
    def test_nosubtitles(self):
        self.url = 'http://www.dailymotion.com/video/x12u166_le-zapping-tele-star-du-08-aout-2013_tv'
        self.DL.params['writesubtitles'] = True
        self.DL.params['allsubtitles'] = True
        subtitles = self.getSubtitles()
        self.assertEqual(len(subtitles), 0)
--- a/test/test_playlists.py
+++ b/test/test_playlists.py
@ -8,7 +8,7 @@ import json
 import os
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

 from youtube_dl.extractor import DailymotionPlaylistIE, VimeoChannelIE
 from youtube_dl.extractor import DailymotionPlaylistIE, VimeoChannelIE, UstreamChannelIE, SoundcloudUserIE
 from youtube_dl.utils import *

 from helper import FakeYDL
@ -34,5 +34,21 @@ class TestPlaylists(unittest.TestCase):
        self.assertEqual(result['title'], u'Vimeo Tributes')
        self.assertTrue(len(result['entries']) > 24)

    def test_ustream_channel(self):
        dl = FakeYDL()
        ie = UstreamChannelIE(dl)
        result = ie.extract('http://www.ustream.tv/channel/young-americans-for-liberty')
        self.assertIsPlaylist(result)
        self.assertEqual(result['id'], u'5124905')
        self.assertTrue(len(result['entries']) >= 11)

    def test_soundcloud_user(self):
        dl = FakeYDL()
        ie = SoundcloudUserIE(dl)
        result = ie.extract('https://soundcloud.com/the-concept-band')
        self.assertIsPlaylist(result)
        self.assertEqual(result['id'], u'9615865')
        self.assertTrue(len(result['entries']) >= 12)

 if __name__ == '__main__':
    unittest.main()
--- a/test/test_utils.py
+++ b/test/test_utils.py
@ -11,13 +11,16 @@ import os
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

 #from youtube_dl.utils import htmlentity_transform
 from youtube_dl.utils import timeconvert
 from youtube_dl.utils import sanitize_filename
 from youtube_dl.utils import unescapeHTML
 from youtube_dl.utils import orderedSet
 from youtube_dl.utils import DateRange
 from youtube_dl.utils import unified_strdate
 from youtube_dl.utils import find_xpath_attr
 from youtube_dl.utils import (
    timeconvert,
    sanitize_filename,
    unescapeHTML,
    orderedSet,
    DateRange,
    unified_strdate,
    find_xpath_attr,
    get_meta_content,
 )

 if sys.version_info < (3, 0):
    _compat_str = lambda b: b.decode('unicode-escape')
@ -127,5 +130,16 @@ class TestUtil(unittest.TestCase):
        self.assertEqual(find_xpath_attr(doc, './/node', 'x', 'a'), doc[1])
        self.assertEqual(find_xpath_attr(doc, './/node', 'y', 'c'), doc[2])

    def test_meta_parser(self):
        testhtml = u'''
        <head>
            <meta name="description" content="foo &amp; bar">
            <meta content='Plato' name='author'/>
        </head>
        '''
        get_meta = lambda name: get_meta_content(name, testhtml)
        self.assertEqual(get_meta('description'), u'foo & bar')
        self.assertEqual(get_meta('author'), 'Plato')

 if __name__ == '__main__':
    unittest.main()
--- a/test/test_youtube_subtitles.py
+++ b/test/test_youtube_subtitles.py
@ -41,6 +41,7 @@ class TestYoutubeSubtitles(unittest.TestCase):
        subtitles = self.getSubtitles()
        self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d')
    def test_youtube_allsubtitles(self):
        self.DL.params['writesubtitles'] = True
        self.DL.params['allsubtitles'] = True
        subtitles = self.getSubtitles()
        self.assertEqual(len(subtitles.keys()), 13)
@ -66,6 +67,7 @@ class TestYoutubeSubtitles(unittest.TestCase):
        self.assertTrue(subtitles['it'] is not None)
    def test_youtube_nosubtitles(self):
        self.url = 'sAjKT8FhjI8'
        self.DL.params['writesubtitles'] = True
        self.DL.params['allsubtitles'] = True
        subtitles = self.getSubtitles()
        self.assertEqual(len(subtitles), 0)
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@ -74,6 +74,7 @@ class YoutubeDL(object):
    writesubtitles:    Write the video subtitles to a file
    writeautomaticsub: Write the automatic subtitles to a file
    allsubtitles:      Downloads all the subtitles of the video
                       (requires writesubtitles or writeautomaticsub)
    listsubtitles:     Lists all available subtitles for the video
    subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
    subtitleslangs:    List of languages of the subtitles to download
@ -492,13 +493,14 @@ class YoutubeDL(object):
                self.report_writedescription(descfn)
                with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
                    descfile.write(info_dict['description'])
            except (KeyError, TypeError):
                self.report_warning(u'There\'s no description to write.')
            except (OSError, IOError):
                self.report_error(u'Cannot write description file ' + descfn)
                return

        subtitles_are_requested = any([self.params.get('writesubtitles', False),
                                       self.params.get('writeautomaticsub'),
                                       self.params.get('allsubtitles', False)])
                                       self.params.get('writeautomaticsub')])

        if  subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
            # subtitles download errors are already managed as troubles in relevant IE
--- a/youtube_dl/init.py
+++ b/youtube_dl/init.py
@ -533,6 +533,11 @@ def _real_main(argv=None):
    else:
        date = DateRange(opts.dateafter, opts.datebefore)

    # --all-sub automatically sets --write-sub if --write-auto-sub is not given
    # this was the old behaviour if only --all-sub was given.
    if opts.allsubtitles and (opts.writeautomaticsub == False):
        opts.writesubtitles = True

    if sys.version_info < (3,):
        # In Python 2, sys.argv is a bytestring (also note http://bugs.python.org/issue2128 for Windows systems)
        if opts.outtmpl is not None:
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -52,6 +52,7 @@ from .jeuxvideo import JeuxVideoIE
 from .jukebox import JukeboxIE
 from .justintv import JustinTVIE
 from .kankan import KankanIE
 from .kickstarter import KickStarterIE
 from .keek import KeekIE
 from .liveleak import LiveLeakIE
 from .livestream import LivestreamIE
@ -81,7 +82,8 @@ from .sina import SinaIE
 from .slashdot import SlashdotIE
 from .slideshare import SlideshareIE
 from .sohu import SohuIE
 from .soundcloud import SoundcloudIE, SoundcloudSetIE
 from .soundcloud import SoundcloudIE, SoundcloudSetIE, SoundcloudUserIE
 from .southparkstudios import SouthParkStudiosIE
 from .spiegel import SpiegelIE
 from .stanfordoc import StanfordOpenClassroomIE
 from .statigram import StatigramIE
@ -96,7 +98,7 @@ from .tudou import TudouIE
 from .tumblr import TumblrIE
 from .tutv import TutvIE
 from .unistra import UnistraIE
 from .ustream import UstreamIE
 from .ustream import UstreamIE, UstreamChannelIE
 from .vbox7 import Vbox7IE
 from .veehd import VeeHDIE
 from .veoh import VeohIE
--- a/youtube_dl/extractor/archiveorg.py
+++ b/youtube_dl/extractor/archiveorg.py
@ -46,6 +46,8 @@ class ArchiveOrgIE(InfoExtractor):
            for fn,fdata in data['files'].items()
            if 'Video' in fdata['format']]
        formats.sort(key=lambda fdata: fdata['file_size'])
        for f in formats:
            f['ext'] = determine_ext(f['url'])

        info = {
            '_type': 'video',
@ -61,7 +63,6 @@ class ArchiveOrgIE(InfoExtractor):
            info['thumbnail'] = thumbnail

        # TODO: Remove when #980 has been merged
        info['url'] = formats[-1]['url']
        info['ext'] = determine_ext(formats[-1]['url'])
        info.update(formats[-1])

        return info
        return info
--- a/youtube_dl/extractor/canalplus.py
+++ b/youtube_dl/extractor/canalplus.py
@ -1,3 +1,4 @@
 # encoding: utf-8
 import re
 import xml.etree.ElementTree

@ -5,24 +6,29 @@ from .common import InfoExtractor
 from ..utils import unified_strdate

 class CanalplusIE(InfoExtractor):
    _VALID_URL = r'https?://(www\.canalplus\.fr/.*?\?vid=|player\.canalplus\.fr/#/)(?P<id>\d+)'
    _VALID_URL = r'https?://(www\.canalplus\.fr/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>\d+))'
    _VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/cplus/%s'
    IE_NAME = u'canalplus.fr'

    _TEST = {
        u'url': u'http://www.canalplus.fr/c-divertissement/pid3351-c-le-petit-journal.html?vid=889861',
        u'file': u'889861.flv',
        u'md5': u'590a888158b5f0d6832f84001fbf3e99',
        u'url': u'http://www.canalplus.fr/c-infos-documentaires/pid1830-c-zapping.html?vid=922470',
        u'file': u'922470.flv',
        u'info_dict': {
            u'title': u'Le Petit Journal 20/06/13 - La guerre des drone',
            u'upload_date': u'20130620',
            u'title': u'Zapping - 26/08/13',
            u'description': u'Le meilleur de toutes les chaînes, tous les jours.\nEmission du 26 août 2013',
            u'upload_date': u'20130826',
        },
        u'params': {
            u'skip_download': True,
        },
        u'skip': u'Requires rtmpdump'
    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        if video_id is None:
            webpage = self._download_webpage(url, mobj.group('path'))
            video_id = self._search_regex(r'videoId = "(\d+)";', webpage, u'video id')
        info_url = self._VIDEO_INFO_TEMPLATE % video_id
        info_page = self._download_webpage(info_url,video_id, 
                                           u'Downloading video info')
@ -43,4 +49,6 @@ class CanalplusIE(InfoExtractor):
                'ext': 'flv',
                'upload_date': unified_strdate(infos.find('PUBLICATION/DATE').text),
                'thumbnail': media.find('IMAGES/GRAND').text,
                'description': infos.find('DESCRIPTION').text,
                'view_count': int(infos.find('NB_VUES').text),
                }
--- a/youtube_dl/extractor/dreisat.py
+++ b/youtube_dl/extractor/dreisat.py
@ -54,6 +54,7 @@ class DreiSatIE(InfoExtractor):
            'width': int(fe.find('./width').text),
            'height': int(fe.find('./height').text),
            'url': fe.find('./url').text,
            'ext': determine_ext(fe.find('./url').text),
            'filesize': int(fe.find('./filesize').text),
            'video_bitrate': int(fe.find('./videoBitrate').text),
            '3sat_qualityname': fe.find('./quality').text,
@ -79,7 +80,6 @@ class DreiSatIE(InfoExtractor):
        }

        # TODO: Remove when #980 has been merged
        info['url'] = formats[-1]['url']
        info['ext'] = determine_ext(formats[-1]['url'])
        info.update(formats[-1])

        return info
        return info
--- a/youtube_dl/extractor/funnyordie.py
+++ b/youtube_dl/extractor/funnyordie.py
@ -21,7 +21,7 @@ class FunnyOrDieIE(InfoExtractor):
        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)

        video_url = self._search_regex(r'type: "video/mp4", src: "(.*?)"',
        video_url = self._search_regex(r'type="video/mp4" src="(.*?)"',
            webpage, u'video URL', flags=re.DOTALL)

        info = {
--- a/youtube_dl/extractor/gamespot.py
+++ b/youtube_dl/extractor/gamespot.py
@ -14,7 +14,7 @@ class GameSpotIE(InfoExtractor):
        u"file": u"6410818.mp4",
        u"md5": u"b2a30deaa8654fcccd43713a6b6a4825",
        u"info_dict": {
            u"title": u"Arma III - Community Guide: SITREP I",
            u"title": u"Arma 3 - Community Guide: SITREP I",
            u"upload_date": u"20130627", 
        }
    }
--- a/youtube_dl/extractor/googleplus.py
+++ b/youtube_dl/extractor/googleplus.py
@ -40,7 +40,8 @@ class GooglePlusIE(InfoExtractor):
        self.report_extraction(video_id)

        # Extract update date
        upload_date = self._html_search_regex('title="Timestamp">(.*?)</a>',
        upload_date = self._html_search_regex(
            ['title="Timestamp">(.*?)</a>', r'<a.+?class="g-M.+?>(.+?)</a>'],
            webpage, u'upload date', fatal=False)
        if upload_date:
            # Convert timestring to a format suitable for filename
--- a/youtube_dl/extractor/kickstarter.py
+++ b/youtube_dl/extractor/kickstarter.py
@ -0,0 +1,37 @@
 import re

 from .common import InfoExtractor


 class KickStarterIE(InfoExtractor):
    _VALID_URL = r'https?://www\.kickstarter\.com/projects/(?P<id>\d*)/.*'
    _TEST = {
        u"url": u"https://www.kickstarter.com/projects/1404461844/intersection-the-story-of-josh-grant?ref=home_location",
        u"file": u"1404461844.mp4",
        u"md5": u"c81addca81327ffa66c642b5d8b08cab",
        u"info_dict": {
            u"title": u"Intersection: The Story of Josh Grant by Kyle Cowling",
        },
    }

    def _real_extract(self, url):
        m = re.match(self._VALID_URL, url)
        video_id = m.group('id')
        webpage_src = self._download_webpage(url, video_id)

        video_url = self._search_regex(r'data-video="(.*?)">',
            webpage_src, u'video URL')
        if 'mp4' in video_url:
            ext = 'mp4'
        else:
            ext = 'flv'
        video_title = self._html_search_regex(r"<title>(.*?)</title>",
            webpage_src, u'title').rpartition(u'\u2014 Kickstarter')[0].strip()

        results = [{
                    'id': video_id,
                    'url': video_url,
                    'title': video_title,
                    'ext': ext,
                    }]
        return results
--- a/youtube_dl/extractor/mixcloud.py
+++ b/youtube_dl/extractor/mixcloud.py
@ -5,34 +5,27 @@ import socket
 from .common import InfoExtractor
 from ..utils import (
    compat_http_client,
    compat_str,
    compat_urllib_error,
    compat_urllib_request,

    ExtractorError,
    unified_strdate,
 )


 class MixcloudIE(InfoExtractor):
    _WORKING = False # New API, but it seems good http://www.mixcloud.com/developers/documentation/
    _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([\w\d-]+)/([\w\d-]+)'
    IE_NAME = u'mixcloud'

    def report_download_json(self, file_id):
        """Report JSON download."""
        self.to_screen(u'Downloading json')

    def get_urls(self, jsonData, fmt, bitrate='best'):
        """Get urls from 'audio_formats' section in json"""
        try:
            bitrate_list = jsonData[fmt]
            if bitrate is None or bitrate == 'best' or bitrate not in bitrate_list:
                bitrate = max(bitrate_list) # select highest

            url_list = jsonData[fmt][bitrate]
        except TypeError: # we have no bitrate info.
            url_list = jsonData[fmt]
        return url_list
    _TEST = {
        u'url': u'http://www.mixcloud.com/dholbach/cryptkeeper/',
        u'file': u'dholbach-cryptkeeper.mp3',
        u'info_dict': {
            u'title': u'Cryptkeeper',
            u'description': u'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.',
            u'uploader': u'Daniel Holbach',
            u'uploader_id': u'dholbach',
            u'upload_date': u'20111115',
        },
    }

    def check_urls(self, url_list):
        """Returns 1st active url from list"""
@ -45,71 +38,32 @@ class MixcloudIE(InfoExtractor):

        return None

    def _print_formats(self, formats):
        print('Available formats:')
        for fmt in formats.keys():
            for b in formats[fmt]:
                try:
                    ext = formats[fmt][b][0]
                    print('%s\t%s\t[%s]' % (fmt, b, ext.split('.')[-1]))
                except TypeError: # we have no bitrate info
                    ext = formats[fmt][0]
                    print('%s\t%s\t[%s]' % (fmt, '??', ext.split('.')[-1]))
                    break

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        if mobj is None:
            raise ExtractorError(u'Invalid URL: %s' % url)
        # extract uploader & filename from url
        uploader = mobj.group(1).decode('utf-8')
        file_id = uploader + "-" + mobj.group(2).decode('utf-8')

        # construct API request
        file_url = 'http://www.mixcloud.com/api/1/cloudcast/' + '/'.join(url.split('/')[-3:-1]) + '.json'
        # retrieve .json file with links to files
        request = compat_urllib_request.Request(file_url)
        try:
            self.report_download_json(file_url)
            jsonData = compat_urllib_request.urlopen(request).read()
        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
            raise ExtractorError(u'Unable to retrieve file: %s' % compat_str(err))

        # parse JSON
        json_data = json.loads(jsonData)
        player_url = json_data['player_swf_url']
        formats = dict(json_data['audio_formats'])

        req_format = self._downloader.params.get('format', None)

        if self._downloader.params.get('listformats', None):
            self._print_formats(formats)
            return

        if req_format is None or req_format == 'best':
            for format_param in formats.keys():
                url_list = self.get_urls(formats, format_param)
                # check urls
                file_url = self.check_urls(url_list)
                if file_url is not None:
                    break # got it!
        else:
            if req_format not in formats:
                raise ExtractorError(u'Format is not available')

            url_list = self.get_urls(formats, req_format)
            file_url = self.check_urls(url_list)
            format_param = req_format

        return [{
            'id': file_id.decode('utf-8'),
            'url': file_url.decode('utf-8'),
            'uploader': uploader.decode('utf-8'),
            'upload_date': None,
            'title': json_data['name'],
            'ext': file_url.split('.')[-1].decode('utf-8'),
            'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
            'thumbnail': json_data['thumbnail_url'],
            'description': json_data['description'],
            'player_url': player_url.decode('utf-8'),
        }]
        uploader = mobj.group(1)
        cloudcast_name = mobj.group(2)
        track_id = '-'.join((uploader, cloudcast_name))
        api_url = 'http://api.mixcloud.com/%s/%s/' % (uploader, cloudcast_name)
        webpage = self._download_webpage(url, track_id)
        json_data = self._download_webpage(api_url, track_id,
            u'Downloading cloudcast info')
        info = json.loads(json_data)

        preview_url = self._search_regex(r'data-preview-url="(.+?)"', webpage, u'preview url')
        song_url = preview_url.replace('/previews/', '/cloudcasts/originals/')
        template_url = re.sub(r'(stream\d*)', 'stream%d', song_url)
        final_song_url = self.check_urls(template_url % i for i in range(30))

        return {
            'id': track_id,
            'title': info['name'],
            'url': final_song_url,
            'ext': 'mp3',
            'description': info['description'],
            'thumbnail': info['pictures'].get('extra_large'),
            'uploader': info['user']['name'],
            'uploader_id': info['user']['username'],
            'upload_date': unified_strdate(info['created_time']),
            'view_count': info['play_count'],
        }
--- a/youtube_dl/extractor/soundcloud.py
+++ b/youtube_dl/extractor/soundcloud.py
@ -1,10 +1,12 @@
 import json
 import re
 import itertools

 from .common import InfoExtractor
 from ..utils import (
    compat_str,
    compat_urlparse,
    compat_urllib_parse,

    ExtractorError,
    unified_strdate,
@ -53,10 +55,11 @@ class SoundcloudIE(InfoExtractor):
    def _resolv_url(cls, url):
        return 'http://api.soundcloud.com/resolve.json?url=' + url + '&client_id=' + cls._CLIENT_ID

    def _extract_info_dict(self, info, full_title=None):
    def _extract_info_dict(self, info, full_title=None, quiet=False):
        video_id = info['id']
        name = full_title or video_id
        self.report_extraction(name)
        if quiet == False:
            self.report_extraction(name)

        thumbnail = info['artwork_url']
        if thumbnail is not None:
@ -198,3 +201,41 @@ class SoundcloudSetIE(SoundcloudIE):
                'id': info['id'],
                'title': info['title'],
                }


 class SoundcloudUserIE(SoundcloudIE):
    _VALID_URL = r'https?://(www\.)?soundcloud.com/(?P<user>[^/]+)(/?(tracks/)?)?(\?.*)?$'
    IE_NAME = u'soundcloud:user'

    # it's in tests/test_playlists.py
    _TEST = None

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        uploader = mobj.group('user')

        url = 'http://soundcloud.com/%s/' % uploader
        resolv_url = self._resolv_url(url)
        user_json = self._download_webpage(resolv_url, uploader,
            u'Downloading user info')
        user = json.loads(user_json)

        tracks = []
        for i in itertools.count():
            data = compat_urllib_parse.urlencode({'offset': i*50,
                                                  'client_id': self._CLIENT_ID,
                                                  })
            tracks_url = 'http://api.soundcloud.com/users/%s/tracks.json?' % user['id'] + data
            response = self._download_webpage(tracks_url, uploader, 
                u'Downloading tracks page %s' % (i+1))
            new_tracks = json.loads(response)
            tracks.extend(self._extract_info_dict(track, quiet=True) for track in new_tracks)
            if len(new_tracks) < 50:
                break

        return {
            '_type': 'playlist',
            'id': compat_str(user['id']),
            'title': user['username'],
            'entries': tracks,
        }
--- a/youtube_dl/extractor/southparkstudios.py
+++ b/youtube_dl/extractor/southparkstudios.py
@ -0,0 +1,34 @@
 import re

 from .mtv import MTVIE, _media_xml_tag


 class SouthParkStudiosIE(MTVIE):
    IE_NAME = u'southparkstudios.com'
    _VALID_URL = r'https?://www\.southparkstudios\.com/clips/(?P<id>\d+)'

    _FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss'

    _TEST = {
        u'url': u'http://www.southparkstudios.com/clips/104437/bat-daded#tab=featured',
        u'file': u'a7bff6c2-ed00-11e0-aca6-0026b9414f30.mp4',
        u'info_dict': {
            u'title': u'Bat Daded',
            u'description': u'Randy disqualifies South Park by getting into a fight with Bat Dad.',
        },
    }

    # Overwrite MTVIE properties we don't want
    _TESTS = []

    def _get_thumbnail_url(self, uri, itemdoc):
        search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
        return itemdoc.find(search_path).attrib['url']

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)
        mgid = self._search_regex(r'swfobject.embedSWF\(".*?(mgid:.*?)"',
                                  webpage, u'mgid')
        return self._get_videos_info(mgid)
--- a/youtube_dl/extractor/subtitles.py
+++ b/youtube_dl/extractor/subtitles.py
@ -10,8 +10,7 @@ class SubtitlesInfoExtractor(InfoExtractor):
    @property
    def _have_to_download_any_subtitles(self):
        return any([self._downloader.params.get('writesubtitles', False),
                    self._downloader.params.get('writeautomaticsub'),
                    self._downloader.params.get('allsubtitles', False)])
                    self._downloader.params.get('writeautomaticsub')])

    def _list_available_subtitles(self, video_id, webpage=None):
        """ outputs the available subtitles for the video """
@ -34,7 +33,7 @@ class SubtitlesInfoExtractor(InfoExtractor):
        available_subs_list = {}
        if self._downloader.params.get('writeautomaticsub', False):
            available_subs_list.update(self._get_available_automatic_caption(video_id, video_webpage))
        if self._downloader.params.get('writesubtitles', False) or self._downloader.params.get('allsubtitles', False):
        if self._downloader.params.get('writesubtitles', False):
            available_subs_list.update(self._get_available_subtitles(video_id))

        if not available_subs_list:  # error, it didn't get the available subtitles
--- a/youtube_dl/extractor/trilulilu.py
+++ b/youtube_dl/extractor/trilulilu.py
@ -52,6 +52,7 @@ class TriluliluIE(InfoExtractor):
            {
                'format': fnode.text,
                'url': video_url_template % fnode.text,
                'ext': fnode.text.partition('-')[0]
            }

            for fnode in format_doc.findall('./formats/format')
@ -67,7 +68,6 @@ class TriluliluIE(InfoExtractor):
        }

        # TODO: Remove when #980 has been merged
        info['url'] = formats[-1]['url']
        info['ext'] = formats[-1]['format'].partition('-')[0]
        info.update(formats[-1])

        return info
--- a/youtube_dl/extractor/ustream.py
+++ b/youtube_dl/extractor/ustream.py
@ -1,6 +1,11 @@
 import json
 import re

 from .common import InfoExtractor
 from ..utils import (
    compat_urlparse,
    get_meta_content,
 )


 class UstreamIE(InfoExtractor):
@ -43,3 +48,25 @@ class UstreamIE(InfoExtractor):
                'thumbnail': thumbnail,
               }
        return info

 class UstreamChannelIE(InfoExtractor):
    _VALID_URL = r'https?://www\.ustream\.tv/channel/(?P<slug>.+)'
    IE_NAME = u'ustream:channel'

    def _real_extract(self, url):
        m = re.match(self._VALID_URL, url)
        slug = m.group('slug')
        webpage = self._download_webpage(url, slug)
        channel_id = get_meta_content('ustream:channel_id', webpage)

        BASE = 'http://www.ustream.tv'
        next_url = '/ajax/socialstream/videos/%s/1.json' % channel_id
        video_ids = []
        while next_url:
            reply = json.loads(self._download_webpage(compat_urlparse.urljoin(BASE, next_url), channel_id))
            video_ids.extend(re.findall(r'data-content-id="(\d.*)"', reply['data']))
            next_url = reply['nextUrl']

        urls = ['http://www.ustream.tv/recorded/' + vid for vid in video_ids]
        url_entries = [self.url_result(eurl, 'Ustream') for eurl in urls]
        return self.playlist_result(url_entries, channel_id)
--- a/youtube_dl/extractor/xhamster.py
+++ b/youtube_dl/extractor/xhamster.py
@ -27,7 +27,7 @@ class XHamsterIE(InfoExtractor):
        mobj = re.match(self._VALID_URL, url)

        video_id = mobj.group('id')
        mrss_url = 'http://xhamster.com/movies/%s/.html' % video_id
        mrss_url = 'http://xhamster.com/movies/%s/.html?hd' % video_id
        webpage = self._download_webpage(mrss_url, video_id)

        mobj = re.search(r'\'srv\': \'(?P<server>[^\']*)\',\s*\'file\': \'(?P<file>[^\']+)\',', webpage)
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@ -139,7 +139,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                     (
                         (?:https?://)?                                       # http(s):// (optional)
                         (?:(?:(?:(?:\w+\.)?youtube(?:-nocookie)?\.com/|
                            tube\.majestyc\.net/)                             # the various hostnames, with wildcard subdomains
                            tube\.majestyc\.net/|
                            youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
                         (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
                         (?:                                                  # the various things that can precede the ID:
                             (?:(?:v|embed|e)/)                               # v/ or embed/ or e/
@ -434,7 +435,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
        elif len(s) == 83:
            return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0]
        elif len(s) == 82:
            return s[1:19] + s[0] + s[20:68] + s[19] + s[69:82]
            return s[80:73:-1] + s[81] + s[72:54:-1] + s[2] + s[53:43:-1] + s[0] + s[42:2:-1] + s[43] + s[1] + s[54]
        elif len(s) == 81:
            return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
        elif len(s) == 80:
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@ -249,7 +249,17 @@ def htmlentity_transform(matchobj):
    return (u'&%s;' % entity)

 compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix
 class AttrParser(compat_html_parser.HTMLParser):
 class BaseHTMLParser(compat_html_parser.HTMLParser):
    def __init(self):
        compat_html_parser.HTMLParser.__init__(self)
        self.html = None

    def loads(self, html):
        self.html = html
        self.feed(html)
        self.close()

 class AttrParser(BaseHTMLParser):
    """Modified HTMLParser that isolates a tag with the specified attribute"""
    def __init__(self, attribute, value):
        self.attribute = attribute
@ -257,10 +267,9 @@ class AttrParser(compat_html_parser.HTMLParser):
        self.result = None
        self.started = False
        self.depth = {}
        self.html = None
        self.watch_startpos = False
        self.error_count = 0
        compat_html_parser.HTMLParser.__init__(self)
        BaseHTMLParser.__init__(self)

    def error(self, message):
        if self.error_count > 10 or self.started:
@ -269,11 +278,6 @@ class AttrParser(compat_html_parser.HTMLParser):
        self.error_count += 1
        self.goahead(1)

    def loads(self, html):
        self.html = html
        self.feed(html)
        self.close()

    def handle_starttag(self, tag, attrs):
        attrs = dict(attrs)
        if self.started:
@ -334,6 +338,38 @@ def get_element_by_attribute(attribute, value, html):
        pass
    return parser.get_result()

 class MetaParser(BaseHTMLParser):
    """
    Modified HTMLParser that isolates a meta tag with the specified name 
    attribute.
    """
    def __init__(self, name):
        BaseHTMLParser.__init__(self)
        self.name = name
        self.content = None
        self.result = None

    def handle_starttag(self, tag, attrs):
        if tag != 'meta':
            return
        attrs = dict(attrs)
        if attrs.get('name') == self.name:
            self.result = attrs.get('content')

    def get_result(self):
        return self.result

 def get_meta_content(name, html):
    """
    Return the content attribute from the meta tag with the given name attribute.
    """
    parser = MetaParser(name)
    try:
        parser.loads(html)
    except compat_html_parser.HTMLParseError:
        pass
    return parser.get_result()


 def clean_html(html):
    """Clean an HTML snippet into a readable string"""
@ -664,7 +700,16 @@ def unified_strdate(date_str):
    date_str = date_str.replace(',',' ')
    # %z (UTC offset) is only supported in python>=3.2
    date_str = re.sub(r' (\+|-)[\d]*$', '', date_str)
    format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y', '%Y-%m-%d', '%d/%m/%Y', '%Y/%m/%d %H:%M:%S', '%d.%m.%Y %H:%M']
    format_expressions = [
        '%d %B %Y',
        '%B %d %Y',
        '%b %d %Y',
        '%Y-%m-%d',
        '%d/%m/%Y',
        '%Y/%m/%d %H:%M:%S',
        '%d.%m.%Y %H:%M',
        '%Y-%m-%dT%H:%M:%SZ',
    ]
    for expression in format_expressions:
        try:
            upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')