Merge remote-tracking branch 'origin/master' into pr-bbcnews

10 years ago · aa5740fb61
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -4,7 +4,10 @@ from .abc import ABCIE
 from .abc7news import Abc7NewsIE
 from .academicearth import AcademicEarthCourseIE
 from .addanime import AddAnimeIE
 from .adobetv import AdobeTVIE
 from .adobetv import (
    AdobeTVIE,
    AdobeTVVideoIE,
 )
 from .adultswim import AdultSwimIE
 from .aftenposten import AftenpostenIE
 from .aftonbladet import AftonbladetIE
@ -103,6 +106,7 @@ from .dailymotion import (
    DailymotionIE,
    DailymotionPlaylistIE,
    DailymotionUserIE,
    DailymotionCloudIE,
 )
 from .daum import DaumIE
 from .dbtv import DBTVIE
@ -401,6 +405,7 @@ from .pbs import PBSIE
 from .philharmoniedeparis import PhilharmonieDeParisIE
 from .phoenix import PhoenixIE
 from .photobucket import PhotobucketIE
 from .pinkbike import PinkbikeIE
 from .planetaplay import PlanetaPlayIE
 from .pladform import PladformIE
 from .played import PlayedIE
@ -696,7 +701,10 @@ from .wrzuta import WrzutaIE
 from .wsj import WSJIE
 from .xbef import XBefIE
 from .xboxclips import XboxClipsIE
 from .xhamster import XHamsterIE
 from .xhamster import (
    XHamsterIE,
    XHamsterEmbedIE,
 )
 from .xminus import XMinusIE
 from .xnxx import XNXXIE
 from .xstream import XstreamIE
--- a/youtube_dl/extractor/adobetv.py
+++ b/youtube_dl/extractor/adobetv.py
@ -5,6 +5,8 @@ from ..utils import (
    parse_duration,
    unified_strdate,
    str_to_int,
    float_or_none,
    ISO639Utils,
 )
@ -69,3 +71,61 @@ class AdobeTVIE(InfoExtractor):
            'view_count': view_count,
            'formats': formats,
        }
 class AdobeTVVideoIE(InfoExtractor):
    _VALID_URL = r'https?://video\.tv\.adobe\.com/v/(?P<id>\d+)'
    _TEST = {
        # From https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners
        'url': 'https://video.tv.adobe.com/v/2456/',
        'md5': '43662b577c018ad707a63766462b1e87',
        'info_dict': {
            'id': '2456',
            'ext': 'mp4',
            'title': 'New experience with Acrobat DC',
            'description': 'New experience with Acrobat DC',
            'duration': 248.667,
        },
    }
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        player_params = self._parse_json(self._search_regex(
            r'var\s+bridge\s*=\s*([^;]+);', webpage, 'player parameters'),
            video_id)
        formats = [{
            'url': source['src'],
            'width': source.get('width'),
            'height': source.get('height'),
            'tbr': source.get('bitrate'),
        } for source in player_params['sources']]
        # For both metadata and downloaded files the duration varies among
        # formats. I just pick the max one
        duration = max(filter(None, [
            float_or_none(source.get('duration'), scale=1000)
            for source in player_params['sources']]))
        subtitles = {}
        for translation in player_params.get('translations', []):
            lang_id = translation.get('language_w3c') or ISO639Utils.long2short(translation['language_medium'])
            if lang_id not in subtitles:
                subtitles[lang_id] = []
            subtitles[lang_id].append({
                'url': translation['vttPath'],
                'ext': 'vtt',
            })
        return {
            'id': video_id,
            'formats': formats,
            'title': player_params['title'],
            'description': self._og_search_description(webpage),
            'duration': duration,
            'subtitles': subtitles,
        }
--- a/youtube_dl/extractor/bbc.py
+++ b/youtube_dl/extractor/bbc.py
@ -255,26 +255,11 @@ class BBCCoUkIE(InfoExtractor):
        for connection in self._extract_connections(media):
            captions = self._download_xml(connection.get('href'), programme_id, 'Downloading captions')
            lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en')
            ps = captions.findall('./{0}body/{0}div/{0}p'.format('{http://www.w3.org/2006/10/ttaf1}'))
            srt = ''
            def _extract_text(p):
                if p.text is not None:
                    stripped_text = p.text.strip()
                    if stripped_text:
                        return stripped_text
                return ' '.join(span.text.strip() for span in p.findall('{http://www.w3.org/2006/10/ttaf1}span'))
            for pos, p in enumerate(ps):
                srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), p.get('begin'), p.get('end'), _extract_text(p))
            subtitles[lang] = [
                {
                    'url': connection.get('href'),
                    'ext': 'ttml',
                },
                {
                    'data': srt,
                    'ext': 'srt',
                },
            ]
        return subtitles
--- a/youtube_dl/extractor/brightcove.py
+++ b/youtube_dl/extractor/brightcove.py
@ -13,6 +13,7 @@ from ..compat import (
    compat_urllib_parse_urlparse,
    compat_urllib_request,
    compat_urlparse,
    compat_xml_parse_error,
 )
 from ..utils import (
    determine_ext,
@ -119,7 +120,7 @@ class BrightcoveIE(InfoExtractor):
        try:
            object_doc = xml.etree.ElementTree.fromstring(object_str.encode('utf-8'))
        except xml.etree.ElementTree.ParseError:
        except compat_xml_parse_error:
            return
        fv_el = find_xpath_attr(object_doc, './param', 'name', 'flashVars')
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@ -251,3 +251,45 @@ class DailymotionUserIE(DailymotionPlaylistIE):
            'title': full_user,
            'entries': self._extract_entries(user),
        }
 class DailymotionCloudIE(DailymotionBaseInfoExtractor):
    _VALID_URL = r'http://api\.dmcloud\.net/embed/[^/]+/(?P<id>[^/?]+)'
    _TEST = {
        # From http://www.francetvinfo.fr/economie/entreprises/les-entreprises-familiales-le-secret-de-la-reussite_933271.html
        # Tested at FranceTvInfo_2
        'url': 'http://api.dmcloud.net/embed/4e7343f894a6f677b10006b4/556e03339473995ee145930c?auth=1464865870-0-jyhsm84b-ead4c701fb750cf9367bf4447167a3db&autoplay=1',
        'only_matching': True,
    }
    @classmethod
    def _extract_dmcloud_url(self, webpage):
        mobj = re.search(r'<iframe[^>]+src=[\'"](http://api\.dmcloud\.net/embed/[^/]+/[^\'"]+)[\'"]', webpage)
        if mobj:
            return mobj.group(1)
        mobj = re.search(r'<input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=[\'"](http://api\.dmcloud\.net/embed/[^/]+/[^\'"]+)[\'"]', webpage)
        if mobj:
            return mobj.group(1)
    def _real_extract(self, url):
        video_id = self._match_id(url)
        request = self._build_request(url)
        webpage = self._download_webpage(request, video_id)
        title = self._html_search_regex(r'<title>([^>]+)</title>', webpage, 'title')
        video_info = self._parse_json(self._search_regex(
            r'var\s+info\s*=\s*([^;]+);', webpage, 'video info'), video_id)
        # TODO: parse ios_url, which is in fact a manifest
        video_url = video_info['mp4_url']
        return {
            'id': video_id,
            'url': video_url,
            'title': title,
            'thumbnail': video_info.get('thumbnail_url'),
        }
--- a/youtube_dl/extractor/dramafever.py
+++ b/youtube_dl/extractor/dramafever.py
@ -6,6 +6,8 @@ import itertools
 from .common import InfoExtractor
 from ..compat import (
    compat_HTTPError,
    compat_urllib_parse,
    compat_urllib_request,
    compat_urlparse,
 )
 from ..utils import (
@ -17,7 +19,39 @@ from ..utils import (
 )
 class DramaFeverIE(InfoExtractor):
 class DramaFeverBaseIE(InfoExtractor):
    _LOGIN_URL = 'https://www.dramafever.com/accounts/login/'
    _NETRC_MACHINE = 'dramafever'
    def _real_initialize(self):
        self._login()
    def _login(self):
        (username, password) = self._get_login_info()
        if username is None:
            return
        login_form = {
            'username': username,
            'password': password,
        }
        request = compat_urllib_request.Request(
            self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
        response = self._download_webpage(
            request, None, 'Logging in as %s' % username)
        if all(logout_pattern not in response
               for logout_pattern in ['href="/accounts/logout/"', '>Log out<']):
            error = self._html_search_regex(
                r'(?s)class="hidden-xs prompt"[^>]*>(.+?)<',
                response, 'error message', default=None)
            if error:
                raise ExtractorError('Unable to login: %s' % error, expected=True)
            raise ExtractorError('Unable to log in')
 class DramaFeverIE(DramaFeverBaseIE):
    IE_NAME = 'dramafever'
    _VALID_URL = r'https?://(?:www\.)?dramafever\.com/drama/(?P<id>[0-9]+/[0-9]+)(?:/|$)'
    _TEST = {
@ -97,7 +131,7 @@ class DramaFeverIE(InfoExtractor):
        }
 class DramaFeverSeriesIE(InfoExtractor):
 class DramaFeverSeriesIE(DramaFeverBaseIE):
    IE_NAME = 'dramafever:series'
    _VALID_URL = r'https?://(?:www\.)?dramafever\.com/drama/(?P<id>[0-9]+)(?:/(?:(?!\d+(?:/|$)).+)?)?$'
    _TESTS = [{
@ -151,8 +185,11 @@ class DramaFeverSeriesIE(InfoExtractor):
                % (consumer_secret, series_id, self._PAGE_SIZE, page_num),
                series_id, 'Downloading episodes JSON page #%d' % page_num)
            for episode in episodes.get('value', []):
                episode_url = episode.get('episode_url')
                if not episode_url:
                    continue
                entries.append(self.url_result(
                    compat_urlparse.urljoin(url, episode['episode_url']),
                    compat_urlparse.urljoin(url, episode_url),
                    'DramaFever', episode.get('guid')))
            if page_num == episodes['num_pages']:
                break
--- a/youtube_dl/extractor/drbonanza.py
+++ b/youtube_dl/extractor/drbonanza.py
@ -15,7 +15,6 @@ class DRBonanzaIE(InfoExtractor):
    _TESTS = [{
        'url': 'http://www.dr.dk/bonanza/serie/portraetter/Talkshowet.htm?assetId=65517',
        'md5': 'fe330252ddea607635cf2eb2c99a0af3',
        'info_dict': {
            'id': '65517',
            'ext': 'mp4',
@ -26,6 +25,9 @@ class DRBonanzaIE(InfoExtractor):
            'upload_date': '20110120',
            'duration': 3664,
        },
        'params': {
            'skip_download': True,  # requires rtmp
        },
    }, {
        'url': 'http://www.dr.dk/bonanza/radio/serie/sport/fodbold.htm?assetId=59410',
        'md5': '6dfe039417e76795fb783c52da3de11d',
@ -93,6 +95,11 @@ class DRBonanzaIE(InfoExtractor):
                        'format_id': file['Type'].replace('Video', ''),
                        'preference': preferencemap.get(file['Type'], -10),
                    })
                    if format['url'].startswith('rtmp'):
                        rtmp_url = format['url']
                        format['rtmp_live'] = True  # --resume does not work
                        if '/bonanza/' in rtmp_url:
                            format['play_path'] = rtmp_url.split('/bonanza/')[1]
                    formats.append(format)
                elif file['Type'] == "Thumb":
                    thumbnail = file['Location']
@ -111,9 +118,6 @@ class DRBonanzaIE(InfoExtractor):
        description = '%s\n%s\n%s\n' % (
            info['Description'], info['Actors'], info['Colophon'])
        for f in formats:
            f['url'] = f['url'].replace('rtmp://vod-bonanza.gss.dr.dk/bonanza/', 'http://vodfiles.dr.dk/')
            f['url'] = f['url'].replace('mp4:bonanza', 'bonanza')
        self._sort_formats(formats)
        display_id = re.sub(r'[^\w\d-]', '', re.sub(r' ', '-', title.lower())) + '-' + asset_id
--- a/youtube_dl/extractor/faz.py
+++ b/youtube_dl/extractor/faz.py
@ -6,9 +6,9 @@ from .common import InfoExtractor
 class FazIE(InfoExtractor):
    IE_NAME = 'faz.net'
    _VALID_URL = r'https?://www\.faz\.net/multimedia/videos/.*?-(?P<id>\d+)\.html'
    _VALID_URL = r'https?://(?:www\.)?faz\.net/(?:[^/]+/)*.*?-(?P<id>\d+)\.html'
    _TEST = {
    _TESTS = [{
        'url': 'http://www.faz.net/multimedia/videos/stockholm-chemie-nobelpreis-fuer-drei-amerikanische-forscher-12610585.html',
        'info_dict': {
            'id': '12610585',
@ -16,7 +16,22 @@ class FazIE(InfoExtractor):
            'title': 'Stockholm: Chemie-Nobelpreis für drei amerikanische Forscher',
            'description': 'md5:1453fbf9a0d041d985a47306192ea253',
        },
    }
    }, {
        'url': 'http://www.faz.net/aktuell/politik/berlin-gabriel-besteht-zerreissprobe-ueber-datenspeicherung-13659345.html',
        'only_matching': True,
    }, {
        'url': 'http://www.faz.net/berlin-gabriel-besteht-zerreissprobe-ueber-datenspeicherung-13659345.html',
        'only_matching': True,
    }, {
        'url': 'http://www.faz.net/-13659345.html',
        'only_matching': True,
    }, {
        'url': 'http://www.faz.net/aktuell/politik/-13659345.html',
        'only_matching': True,
    }, {
        'url': 'http://www.faz.net/foobarblafasel-13659345.html',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        video_id = self._match_id(url)
--- a/youtube_dl/extractor/francetv.py
+++ b/youtube_dl/extractor/francetv.py
@ -18,6 +18,7 @@ from ..utils import (
    parse_duration,
    determine_ext,
 )
 from .dailymotion import DailymotionCloudIE
 class FranceTVBaseInfoExtractor(InfoExtractor):
@ -131,12 +132,26 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):
            'skip_download': 'HLS (reqires ffmpeg)'
        },
        'skip': 'Ce direct est terminé et sera disponible en rattrapage dans quelques minutes.',
    }, {
        'url': 'http://www.francetvinfo.fr/economie/entreprises/les-entreprises-familiales-le-secret-de-la-reussite_933271.html',
        'md5': 'f485bda6e185e7d15dbc69b72bae993e',
        'info_dict': {
            'id': '556e03339473995ee145930c',
            'ext': 'mp4',
            'title': 'Les entreprises familiales : le secret de la réussite',
            'thumbnail': 're:^https?://.*\.jpe?g$',
        }
    }]
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        page_title = mobj.group('title')
        webpage = self._download_webpage(url, page_title)
        dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage)
        if dmcloud_url:
            return self.url_result(dmcloud_url, 'DailymotionCloud')
        video_id, catalogue = self._search_regex(
            r'id-video=([^@]+@[^"]+)', webpage, 'video id').split('@')
        return self._extract_video(video_id, catalogue)
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@ -43,6 +43,9 @@ from .senateisvp import SenateISVPIE
 from .bliptv import BlipTVIE
 from .svt import SVTIE
 from .pornhub import PornHubIE
 from .xhamster import XHamsterEmbedIE
 from .vimeo import VimeoIE
 from .dailymotion import DailymotionCloudIE
 class GenericIE(InfoExtractor):
@ -333,6 +336,15 @@ class GenericIE(InfoExtractor):
                'skip_download': True,
            },
        },
        # XHamster embed
        {
            'url': 'http://www.numisc.com/forum/showthread.php?11696-FM15-which-pumiscer-was-this-%28-vid-%29-%28-alfa-as-fuck-srx-%29&s=711f5db534502e22260dec8c5e2d66d8',
            'info_dict': {
                'id': 'showthread',
                'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )',
            },
            'playlist_mincount': 7,
        },
        # Embedded TED video
        {
            'url': 'http://en.support.wordpress.com/videos/ted-talks/',
@ -812,6 +824,29 @@ class GenericIE(InfoExtractor):
                'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
                'uploader': 'Rogers Sportsnet',
            },
        },
        # Dailymotion Cloud video
        {
            'url': 'http://replay.publicsenat.fr/vod/le-debat/florent-kolandjian,dominique-cena,axel-decourtye,laurence-abeille,bruno-parmentier/175910',
            'md5': '49444254273501a64675a7e68c502681',
            'info_dict': {
                'id': '5585de919473990de4bee11b',
                'ext': 'mp4',
                'title': 'Le débat',
                'thumbnail': 're:^https?://.*\.jpe?g$',
            }
        },
        # AdobeTVVideo embed
        {
            'url': 'https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners',
            'md5': '43662b577c018ad707a63766462b1e87',
            'info_dict': {
                'id': '2456',
                'ext': 'mp4',
                'title': 'New experience with Acrobat DC',
                'description': 'New experience with Acrobat DC',
                'duration': 248.667,
            },
        }
    ]
@ -1089,18 +1124,9 @@ class GenericIE(InfoExtractor):
        if matches:
            return _playlist_from_matches(matches, ie='RtlNl')
        # Look for embedded (iframe) Vimeo player
        mobj = re.search(
            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage)
        if mobj:
            player_url = unescapeHTML(mobj.group('url'))
            surl = smuggle_url(player_url, {'Referer': url})
            return self.url_result(surl)
        # Look for embedded (swf embed) Vimeo player
        mobj = re.search(
            r'<embed[^>]+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
        if mobj:
            return self.url_result(mobj.group(1))
        vimeo_url = VimeoIE._extract_vimeo_url(url, webpage)
        if vimeo_url is not None:
            return self.url_result(vimeo_url)
        # Look for embedded YouTube player
        matches = re.findall(r'''(?x)
@ -1327,6 +1353,11 @@ class GenericIE(InfoExtractor):
        if pornhub_url:
            return self.url_result(pornhub_url, 'PornHub')
        # Look for embedded XHamster player
        xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
        if xhamster_urls:
            return _playlist_from_matches(xhamster_urls, ie='XHamsterEmbed')
        # Look for embedded Tvigle player
        mobj = re.search(
            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
@ -1494,6 +1525,20 @@ class GenericIE(InfoExtractor):
        if senate_isvp_url:
            return self.url_result(senate_isvp_url, 'SenateISVP')
        # Look for Dailymotion Cloud videos
        dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage)
        if dmcloud_url:
            return self.url_result(dmcloud_url, 'DailymotionCloud')
        # Look for AdobeTVVideo embeds
        mobj = re.search(
            r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
            webpage)
        if mobj is not None:
            return self.url_result(
                self._proto_relative_url(unescapeHTML(mobj.group(1))),
                'AdobeTVVideo')
        def check_video(vurl):
            if YoutubeIE.suitable(vurl):
                return True
--- a/youtube_dl/extractor/imdb.py
+++ b/youtube_dl/extractor/imdb.py
@ -46,7 +46,7 @@ class ImdbIE(InfoExtractor):
            format_info = info['videoPlayerObject']['video']
            formats.append({
                'format_id': f_id,
                'url': format_info['url'],
                'url': format_info['videoInfoList'][0]['videoUrl'],
            })
        return {
--- a/youtube_dl/extractor/pinkbike.py
+++ b/youtube_dl/extractor/pinkbike.py
@ -0,0 +1,96 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import (
    int_or_none,
    remove_end,
    remove_start,
    str_to_int,
    unified_strdate,
 )
 class PinkbikeIE(InfoExtractor):
    _VALID_URL = r'https?://(?:(?:www\.)?pinkbike\.com/video/|es\.pinkbike\.org/i/kvid/kvid-y5\.swf\?id=)(?P<id>[0-9]+)'
    _TESTS = [{
        'url': 'http://www.pinkbike.com/video/402811/',
        'md5': '4814b8ca7651034cd87e3361d5c2155a',
        'info_dict': {
            'id': '402811',
            'ext': 'mp4',
            'title': 'Brandon Semenuk - RAW 100',
            'description': 'Official release: www.redbull.ca/rupertwalker',
            'thumbnail': 're:^https?://.*\.jpg$',
            'duration': 100,
            'upload_date': '20150406',
            'uploader': 'revelco',
            'location': 'Victoria, British Columbia, Canada',
            'view_count': int,
            'comment_count': int,
        }
    }, {
        'url': 'http://es.pinkbike.org/i/kvid/kvid-y5.swf?id=406629',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(
            'http://www.pinkbike.com/video/%s' % video_id, video_id)
        formats = []
        for _, format_id, src in re.findall(
                r'data-quality=((?:\\)?["\'])(.+?)\1[^>]+src=\1(.+?)\1', webpage):
            height = int_or_none(self._search_regex(
                r'^(\d+)[pP]$', format_id, 'height', default=None))
            formats.append({
                'url': src,
                'format_id': format_id,
                'height': height,
            })
        self._sort_formats(formats)
        title = remove_end(self._og_search_title(webpage), ' Video - Pinkbike')
        description = self._html_search_regex(
            r'(?s)id="media-description"[^>]*>(.+?)<',
            webpage, 'description', default=None) or remove_start(
            self._og_search_description(webpage), title + '. ')
        thumbnail = self._og_search_thumbnail(webpage)
        duration = int_or_none(self._html_search_meta(
            'video:duration', webpage, 'duration'))
        uploader = self._search_regex(
            r'un:\s*"([^"]+)"', webpage, 'uploader', fatal=False)
        upload_date = unified_strdate(self._search_regex(
            r'class="fullTime"[^>]+title="([^"]+)"',
            webpage, 'upload date', fatal=False))
        location = self._html_search_regex(
            r'(?s)<dt>Location</dt>\s*<dd>(.+?)<',
            webpage, 'location', fatal=False)
        def extract_count(webpage, label):
            return str_to_int(self._search_regex(
                r'<span[^>]+class="stat-num"[^>]*>([\d,.]+)</span>\s*<span[^>]+class="stat-label"[^>]*>%s' % label,
                webpage, label, fatal=False))
        view_count = extract_count(webpage, 'Views')
        comment_count = extract_count(webpage, 'Comments')
        return {
            'id': video_id,
            'title': title,
            'description': description,
            'thumbnail': thumbnail,
            'duration': duration,
            'upload_date': upload_date,
            'uploader': uploader,
            'location': location,
            'view_count': view_count,
            'comment_count': comment_count,
            'formats': formats
        }
--- a/youtube_dl/extractor/sohu.py
+++ b/youtube_dl/extractor/sohu.py
@ -6,9 +6,12 @@ import re
 from .common import InfoExtractor
 from ..compat import (
    compat_str,
    compat_urllib_request
    compat_urllib_request,
    compat_urllib_parse,
 )
 from ..utils import (
    ExtractorError,
 )
 from ..utils import ExtractorError
 class SohuIE(InfoExtractor):
@ -26,7 +29,7 @@ class SohuIE(InfoExtractor):
        'skip': 'On available in China',
    }, {
        'url': 'http://tv.sohu.com/20150305/n409385080.shtml',
        'md5': 'ac9a5d322b4bf9ae184d53e4711e4f1a',
        'md5': '699060e75cf58858dd47fb9c03c42cfb',
        'info_dict': {
            'id': '409385080',
            'ext': 'mp4',
@ -34,7 +37,7 @@ class SohuIE(InfoExtractor):
        }
    }, {
        'url': 'http://my.tv.sohu.com/us/232799889/78693464.shtml',
        'md5': '49308ff6dafde5ece51137d04aec311e',
        'md5': '9bf34be48f2f4dadcb226c74127e203c',
        'info_dict': {
            'id': '78693464',
            'ext': 'mp4',
@ -48,7 +51,7 @@ class SohuIE(InfoExtractor):
            'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
        },
        'playlist': [{
            'md5': '492923eac023ba2f13ff69617c32754a',
            'md5': 'bdbfb8f39924725e6589c146bc1883ad',
            'info_dict': {
                'id': '78910339_part1',
                'ext': 'mp4',
@ -56,7 +59,7 @@ class SohuIE(InfoExtractor):
                'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
            }
        }, {
            'md5': 'de604848c0e8e9c4a4dde7e1347c0637',
            'md5': '3e1f46aaeb95354fd10e7fca9fc1804e',
            'info_dict': {
                'id': '78910339_part2',
                'ext': 'mp4',
@ -64,7 +67,7 @@ class SohuIE(InfoExtractor):
                'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
            }
        }, {
            'md5': '93584716ee0657c0b205b8aa3d27aa13',
            'md5': '8407e634175fdac706766481b9443450',
            'info_dict': {
                'id': '78910339_part3',
                'ext': 'mp4',
@ -139,21 +142,42 @@ class SohuIE(InfoExtractor):
        for i in range(part_count):
            formats = []
            for format_id, format_data in formats_json.items():
                allot = format_data['allot']
                data = format_data['data']
                clips_url = data['clipsURL']
                su = data['su']
                # URLs starts with http://newflv.sohu.ccgslb.net/ is not usable
                # so retry until got a working URL
                video_url = 'newflv.sohu.ccgslb.net'
                cdnId = None
                retries = 0
                while 'newflv.sohu.ccgslb.net' in video_url and retries < 5:
                    download_note = 'Download information from CDN gateway for format ' + format_id
                while 'newflv.sohu.ccgslb.net' in video_url:
                    params = {
                        'prot': 9,
                        'file': clips_url[i],
                        'new': su[i],
                        'prod': 'flash',
                    }
                    if cdnId is not None:
                        params['idc'] = cdnId
                    download_note = 'Downloading %s video URL part %d of %d' % (
                        format_id, i + 1, part_count)
                    if retries > 0:
                        download_note += ' (retry #%d)' % retries
                    part_info = self._parse_json(self._download_webpage(
                        'http://%s/?%s' % (allot, compat_urllib_parse.urlencode(params)),
                        video_id, download_note), video_id)
                    video_url = part_info['url']
                    cdnId = part_info.get('nid')
                    retries += 1
                    cdn_info = self._download_json(
                        'http://data.vod.itc.cn/cdnList?new=' + data['su'][i],
                        video_id, download_note)
                    video_url = cdn_info['url']
                    if retries > 5:
                        raise ExtractorError('Failed to get video URL')
                formats.append({
                    'url': video_url,
--- a/youtube_dl/extractor/tumblr.py
+++ b/youtube_dl/extractor/tumblr.py
@ -5,6 +5,7 @@ import re
 from .common import InfoExtractor
 from .pornhub import PornHubIE
 from .vimeo import VimeoIE
 class TumblrIE(InfoExtractor):
@ -40,6 +41,17 @@ class TumblrIE(InfoExtractor):
            'timestamp': 1430931613,
        },
        'add_ie': ['Vidme'],
    }, {
        'url': 'http://camdamage.tumblr.com/post/98846056295/',
        'md5': 'a9e0c8371ea1ca306d6554e3fecf50b6',
        'info_dict': {
            'id': '105463834',
            'ext': 'mp4',
            'title': 'Cam Damage-HD 720p',
            'uploader': 'John Moyer',
            'uploader_id': 'user32021558',
        },
        'add_ie': ['Vimeo'],
    }]
    def _real_extract(self, url):
@ -60,6 +72,10 @@ class TumblrIE(InfoExtractor):
        if pornhub_url:
            return self.url_result(pornhub_url, 'PornHub')
        vimeo_url = VimeoIE._extract_vimeo_url(url, webpage)
        if vimeo_url:
            return self.url_result(vimeo_url, 'Vimeo')
        iframe_url = self._search_regex(
            r'src=\'(https?://www\.tumblr\.com/video/[^\']+)\'',
            webpage, 'iframe url')
--- a/youtube_dl/extractor/viki.py
+++ b/youtube_dl/extractor/viki.py
@ -28,11 +28,15 @@ class VikiBaseIE(InfoExtractor):
    _NETRC_MACHINE = 'viki'
    _token = None
    def _prepare_call(self, path, timestamp=None, post_data=None):
        path += '?' if '?' not in path else '&'
        if not timestamp:
            timestamp = int(time.time())
        query = self._API_QUERY_TEMPLATE % (path, self._APP, timestamp)
        if self._token:
            query += '&token=%s' % self._token
        sig = hmac.new(
            self._APP_SECRET.encode('ascii'),
            query.encode('ascii'),
@ -76,10 +80,14 @@ class VikiBaseIE(InfoExtractor):
            'password': password,
        }
        self._call_api(
        login = self._call_api(
            'sessions.json', None,
            'Logging in as %s' % username, post_data=login_form)
        self._token = login.get('token')
        if not self._token:
            self.report_warning('Unable to get session token, login has probably failed')
 class VikiIE(VikiBaseIE):
    IE_NAME = 'viki'
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@ -22,6 +22,7 @@ from ..utils import (
    unified_strdate,
    unsmuggle_url,
    urlencode_postdata,
    unescapeHTML,
 )
@ -173,6 +174,21 @@ class VimeoIE(VimeoBaseInfoExtractor):
        },
    ]
    @staticmethod
    def _extract_vimeo_url(url, webpage):
        # Look for embedded (iframe) Vimeo player
        mobj = re.search(
            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage)
        if mobj:
            player_url = unescapeHTML(mobj.group('url'))
            surl = smuggle_url(player_url, {'Referer': url})
            return surl
        # Look for embedded (swf embed) Vimeo player
        mobj = re.search(
            r'<embed[^>]+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
        if mobj:
            return mobj.group(1)
    def _verify_video_password(self, url, video_id, webpage):
        password = self._downloader.params.get('videopassword', None)
        if password is None:
--- a/youtube_dl/extractor/xhamster.py
+++ b/youtube_dl/extractor/xhamster.py
@ -13,7 +13,6 @@ from ..utils import (
 class XHamsterIE(InfoExtractor):
    """Information Extractor for xHamster"""
    _VALID_URL = r'(?P<proto>https?)://(?:.+?\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.+?)\.html(?:\?.*)?'
    _TESTS = [
        {
@ -133,3 +132,36 @@ class XHamsterIE(InfoExtractor):
            'age_limit': age_limit,
            'formats': formats,
        }
 class XHamsterEmbedIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?xhamster\.com/xembed\.php\?video=(?P<id>\d+)'
    _TEST = {
        'url': 'http://xhamster.com/xembed.php?video=3328539',
        'info_dict': {
            'id': '3328539',
            'ext': 'mp4',
            'title': 'Pen Masturbation',
            'upload_date': '20140728',
            'uploader_id': 'anonymous',
            'duration': 5,
            'age_limit': 18,
        }
    }
    @staticmethod
    def _extract_urls(webpage):
        return [url for _, url in re.findall(
            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?xhamster\.com/xembed\.php\?video=\d+)\1',
            webpage)]
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        video_url = self._search_regex(
            r'href="(https?://xhamster\.com/movies/%s/[^"]+\.html[^"]*)"' % video_id,
            webpage, 'xhamster url')
        return self.url_result(video_url, 'XHamster')
--- a/youtube_dl/extractor/xvideos.py
+++ b/youtube_dl/extractor/xvideos.py
@ -5,10 +5,12 @@ import re
 from .common import InfoExtractor
 from ..compat import (
    compat_urllib_parse,
    compat_urllib_request,
 )
 from ..utils import (
    clean_html,
    ExtractorError,
    determine_ext,
 )
@ -25,6 +27,8 @@ class XVideosIE(InfoExtractor):
        }
    }
    _ANDROID_USER_AGENT = 'Mozilla/5.0 (Linux; Android 4.0.4; Galaxy Nexus Build/IMM76B) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.133 Mobile Safari/535.19'
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
@ -40,9 +44,30 @@ class XVideosIE(InfoExtractor):
        video_thumbnail = self._search_regex(
            r'url_bigthumb=(.+?)&amp', webpage, 'thumbnail', fatal=False)
        formats = [{
            'url': video_url,
        }]
        android_req = compat_urllib_request.Request(url)
        android_req.add_header('User-Agent', self._ANDROID_USER_AGENT)
        android_webpage = self._download_webpage(android_req, video_id, fatal=False)
        if android_webpage is not None:
            player_params_str = self._search_regex(
                'mobileReplacePlayerDivTwoQual\(([^)]+)\)',
                android_webpage, 'player parameters', default='')
            player_params = list(map(lambda s: s.strip(' \''), player_params_str.split(',')))
            if player_params:
                formats.extend([{
                    'url': param,
                    'preference': -10,
                } for param in player_params if determine_ext(param) == 'mp4'])
        self._sort_formats(formats)
        return {
            'id': video_id,
            'url': video_url,
            'formats': formats,
            'title': video_title,
            'ext': 'flv',
            'thumbnail': video_thumbnail,
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@ -234,6 +234,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
        '44': {'ext': 'webm', 'width': 854, 'height': 480},
        '45': {'ext': 'webm', 'width': 1280, 'height': 720},
        '46': {'ext': 'webm', 'width': 1920, 'height': 1080},
        '59': {'ext': 'mp4', 'width': 854, 'height': 480},
        '78': {'ext': 'mp4', 'width': 854, 'height': 480},
        # 3d videos
--- a/youtube_dl/postprocessor/ffmpeg.py
+++ b/youtube_dl/postprocessor/ffmpeg.py
@ -21,6 +21,7 @@ from ..utils import (
    shell_quote,
    subtitles_filename,
    dfxp2srt,
    ISO639Utils,
 )
@ -307,199 +308,6 @@ class FFmpegVideoConvertorPP(FFmpegPostProcessor):
 class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
    # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
    _lang_map = {
        'aa': 'aar',
        'ab': 'abk',
        'ae': 'ave',
        'af': 'afr',
        'ak': 'aka',
        'am': 'amh',
        'an': 'arg',
        'ar': 'ara',
        'as': 'asm',
        'av': 'ava',
        'ay': 'aym',
        'az': 'aze',
        'ba': 'bak',
        'be': 'bel',
        'bg': 'bul',
        'bh': 'bih',
        'bi': 'bis',
        'bm': 'bam',
        'bn': 'ben',
        'bo': 'bod',
        'br': 'bre',
        'bs': 'bos',
        'ca': 'cat',
        'ce': 'che',
        'ch': 'cha',
        'co': 'cos',
        'cr': 'cre',
        'cs': 'ces',
        'cu': 'chu',
        'cv': 'chv',
        'cy': 'cym',
        'da': 'dan',
        'de': 'deu',
        'dv': 'div',
        'dz': 'dzo',
        'ee': 'ewe',
        'el': 'ell',
        'en': 'eng',
        'eo': 'epo',
        'es': 'spa',
        'et': 'est',
        'eu': 'eus',
        'fa': 'fas',
        'ff': 'ful',
        'fi': 'fin',
        'fj': 'fij',
        'fo': 'fao',
        'fr': 'fra',
        'fy': 'fry',
        'ga': 'gle',
        'gd': 'gla',
        'gl': 'glg',
        'gn': 'grn',
        'gu': 'guj',
        'gv': 'glv',
        'ha': 'hau',
        'he': 'heb',
        'hi': 'hin',
        'ho': 'hmo',
        'hr': 'hrv',
        'ht': 'hat',
        'hu': 'hun',
        'hy': 'hye',
        'hz': 'her',
        'ia': 'ina',
        'id': 'ind',
        'ie': 'ile',
        'ig': 'ibo',
        'ii': 'iii',
        'ik': 'ipk',
        'io': 'ido',
        'is': 'isl',
        'it': 'ita',
        'iu': 'iku',
        'ja': 'jpn',
        'jv': 'jav',
        'ka': 'kat',
        'kg': 'kon',
        'ki': 'kik',
        'kj': 'kua',
        'kk': 'kaz',
        'kl': 'kal',
        'km': 'khm',
        'kn': 'kan',
        'ko': 'kor',
        'kr': 'kau',
        'ks': 'kas',
        'ku': 'kur',
        'kv': 'kom',
        'kw': 'cor',
        'ky': 'kir',
        'la': 'lat',
        'lb': 'ltz',
        'lg': 'lug',
        'li': 'lim',
        'ln': 'lin',
        'lo': 'lao',
        'lt': 'lit',
        'lu': 'lub',
        'lv': 'lav',
        'mg': 'mlg',
        'mh': 'mah',
        'mi': 'mri',
        'mk': 'mkd',
        'ml': 'mal',
        'mn': 'mon',
        'mr': 'mar',
        'ms': 'msa',
        'mt': 'mlt',
        'my': 'mya',
        'na': 'nau',
        'nb': 'nob',
        'nd': 'nde',
        'ne': 'nep',
        'ng': 'ndo',
        'nl': 'nld',
        'nn': 'nno',
        'no': 'nor',
        'nr': 'nbl',
        'nv': 'nav',
        'ny': 'nya',
        'oc': 'oci',
        'oj': 'oji',
        'om': 'orm',
        'or': 'ori',
        'os': 'oss',
        'pa': 'pan',
        'pi': 'pli',
        'pl': 'pol',
        'ps': 'pus',
        'pt': 'por',
        'qu': 'que',
        'rm': 'roh',
        'rn': 'run',
        'ro': 'ron',
        'ru': 'rus',
        'rw': 'kin',
        'sa': 'san',
        'sc': 'srd',
        'sd': 'snd',
        'se': 'sme',
        'sg': 'sag',
        'si': 'sin',
        'sk': 'slk',
        'sl': 'slv',
        'sm': 'smo',
        'sn': 'sna',
        'so': 'som',
        'sq': 'sqi',
        'sr': 'srp',
        'ss': 'ssw',
        'st': 'sot',
        'su': 'sun',
        'sv': 'swe',
        'sw': 'swa',
        'ta': 'tam',
        'te': 'tel',
        'tg': 'tgk',
        'th': 'tha',
        'ti': 'tir',
        'tk': 'tuk',
        'tl': 'tgl',
        'tn': 'tsn',
        'to': 'ton',
        'tr': 'tur',
        'ts': 'tso',
        'tt': 'tat',
        'tw': 'twi',
        'ty': 'tah',
        'ug': 'uig',
        'uk': 'ukr',
        'ur': 'urd',
        'uz': 'uzb',
        've': 'ven',
        'vi': 'vie',
        'vo': 'vol',
        'wa': 'wln',
        'wo': 'wol',
        'xh': 'xho',
        'yi': 'yid',
        'yo': 'yor',
        'za': 'zha',
        'zh': 'zho',
        'zu': 'zul',
    }
    @classmethod
    def _conver_lang_code(cls, code):
        """Convert language code from ISO 639-1 to ISO 639-2/T"""
        return cls._lang_map.get(code[:2])
    def run(self, information):
        if information['ext'] not in ['mp4', 'mkv']:
            self._downloader.to_screen('[ffmpeg] Subtitles can only be embedded in mp4 or mkv files')
@ -525,7 +333,7 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
            opts += ['-c:s', 'mov_text']
        for (i, lang) in enumerate(sub_langs):
            opts.extend(['-map', '%d:0' % (i + 1)])
            lang_code = self._conver_lang_code(lang)
            lang_code = ISO639Utils.short2long(lang)
            if lang_code is not None:
                opts.extend(['-metadata:s:s:%d' % i, 'language=%s' % lang_code])
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@ -1841,7 +1841,10 @@ def srt_subtitles_timecode(seconds):
 def dfxp2srt(dfxp_data):
    _x = functools.partial(xpath_with_ns, ns_map={'ttml': 'http://www.w3.org/ns/ttml'})
    _x = functools.partial(xpath_with_ns, ns_map={
        'ttml': 'http://www.w3.org/ns/ttml',
        'ttaf1': 'http://www.w3.org/2006/10/ttaf1',
    })
    def parse_node(node):
        str_or_empty = functools.partial(str_or_none, default='')
@ -1849,9 +1852,9 @@ def dfxp2srt(dfxp_data):
        out = str_or_empty(node.text)
        for child in node:
            if child.tag in (_x('ttml:br'), 'br'):
            if child.tag in (_x('ttml:br'), _x('ttaf1:br'), 'br'):
                out += '\n' + str_or_empty(child.tail)
            elif child.tag in (_x('ttml:span'), 'span'):
            elif child.tag in (_x('ttml:span'), _x('ttaf1:span'), 'span'):
                out += str_or_empty(parse_node(child))
            else:
                out += str_or_empty(xml.etree.ElementTree.tostring(child))
@ -1860,7 +1863,7 @@ def dfxp2srt(dfxp_data):
    dfxp = xml.etree.ElementTree.fromstring(dfxp_data.encode('utf-8'))
    out = []
    paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
    paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall(_x('.//ttaf1:p')) or dfxp.findall('.//p')
    if not paras:
        raise ValueError('Invalid dfxp/TTML subtitle')
@ -1879,6 +1882,208 @@ def dfxp2srt(dfxp_data):
    return ''.join(out)
 class ISO639Utils(object):
    # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
    _lang_map = {
        'aa': 'aar',
        'ab': 'abk',
        'ae': 'ave',
        'af': 'afr',
        'ak': 'aka',
        'am': 'amh',
        'an': 'arg',
        'ar': 'ara',
        'as': 'asm',
        'av': 'ava',
        'ay': 'aym',
        'az': 'aze',
        'ba': 'bak',
        'be': 'bel',
        'bg': 'bul',
        'bh': 'bih',
        'bi': 'bis',
        'bm': 'bam',
        'bn': 'ben',
        'bo': 'bod',
        'br': 'bre',
        'bs': 'bos',
        'ca': 'cat',
        'ce': 'che',
        'ch': 'cha',
        'co': 'cos',
        'cr': 'cre',
        'cs': 'ces',
        'cu': 'chu',
        'cv': 'chv',
        'cy': 'cym',
        'da': 'dan',
        'de': 'deu',
        'dv': 'div',
        'dz': 'dzo',
        'ee': 'ewe',
        'el': 'ell',
        'en': 'eng',
        'eo': 'epo',
        'es': 'spa',
        'et': 'est',
        'eu': 'eus',
        'fa': 'fas',
        'ff': 'ful',
        'fi': 'fin',
        'fj': 'fij',
        'fo': 'fao',
        'fr': 'fra',
        'fy': 'fry',
        'ga': 'gle',
        'gd': 'gla',
        'gl': 'glg',
        'gn': 'grn',
        'gu': 'guj',
        'gv': 'glv',
        'ha': 'hau',
        'he': 'heb',
        'hi': 'hin',
        'ho': 'hmo',
        'hr': 'hrv',
        'ht': 'hat',
        'hu': 'hun',
        'hy': 'hye',
        'hz': 'her',
        'ia': 'ina',
        'id': 'ind',
        'ie': 'ile',
        'ig': 'ibo',
        'ii': 'iii',
        'ik': 'ipk',
        'io': 'ido',
        'is': 'isl',
        'it': 'ita',
        'iu': 'iku',
        'ja': 'jpn',
        'jv': 'jav',
        'ka': 'kat',
        'kg': 'kon',
        'ki': 'kik',
        'kj': 'kua',
        'kk': 'kaz',
        'kl': 'kal',
        'km': 'khm',
        'kn': 'kan',
        'ko': 'kor',
        'kr': 'kau',
        'ks': 'kas',
        'ku': 'kur',
        'kv': 'kom',
        'kw': 'cor',
        'ky': 'kir',
        'la': 'lat',
        'lb': 'ltz',
        'lg': 'lug',
        'li': 'lim',
        'ln': 'lin',
        'lo': 'lao',
        'lt': 'lit',
        'lu': 'lub',
        'lv': 'lav',
        'mg': 'mlg',
        'mh': 'mah',
        'mi': 'mri',
        'mk': 'mkd',
        'ml': 'mal',
        'mn': 'mon',
        'mr': 'mar',
        'ms': 'msa',
        'mt': 'mlt',
        'my': 'mya',
        'na': 'nau',
        'nb': 'nob',
        'nd': 'nde',
        'ne': 'nep',
        'ng': 'ndo',
        'nl': 'nld',
        'nn': 'nno',
        'no': 'nor',
        'nr': 'nbl',
        'nv': 'nav',
        'ny': 'nya',
        'oc': 'oci',
        'oj': 'oji',
        'om': 'orm',
        'or': 'ori',
        'os': 'oss',
        'pa': 'pan',
        'pi': 'pli',
        'pl': 'pol',
        'ps': 'pus',
        'pt': 'por',
        'qu': 'que',
        'rm': 'roh',
        'rn': 'run',
        'ro': 'ron',
        'ru': 'rus',
        'rw': 'kin',
        'sa': 'san',
        'sc': 'srd',
        'sd': 'snd',
        'se': 'sme',
        'sg': 'sag',
        'si': 'sin',
        'sk': 'slk',
        'sl': 'slv',
        'sm': 'smo',
        'sn': 'sna',
        'so': 'som',
        'sq': 'sqi',
        'sr': 'srp',
        'ss': 'ssw',
        'st': 'sot',
        'su': 'sun',
        'sv': 'swe',
        'sw': 'swa',
        'ta': 'tam',
        'te': 'tel',
        'tg': 'tgk',
        'th': 'tha',
        'ti': 'tir',
        'tk': 'tuk',
        'tl': 'tgl',
        'tn': 'tsn',
        'to': 'ton',
        'tr': 'tur',
        'ts': 'tso',
        'tt': 'tat',
        'tw': 'twi',
        'ty': 'tah',
        'ug': 'uig',
        'uk': 'ukr',
        'ur': 'urd',
        'uz': 'uzb',
        've': 'ven',
        'vi': 'vie',
        'vo': 'vol',
        'wa': 'wln',
        'wo': 'wol',
        'xh': 'xho',
        'yi': 'yid',
        'yo': 'yor',
        'za': 'zha',
        'zh': 'zho',
        'zu': 'zul',
    }
    @classmethod
    def short2long(cls, code):
        """Convert language code from ISO 639-1 to ISO 639-2/T"""
        return cls._lang_map.get(code[:2])
    @classmethod
    def long2short(cls, code):
        """Convert language code from ISO 639-2/T to ISO 639-1"""
        for short_name, long_name in cls._lang_map.items():
            if long_name == code:
                return short_name
 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
    def __init__(self, proxies=None):
        # Set default handlers