[viddler] Add basic support (Fixes #1520)

11 years ago · 41e8bca4d0
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -117,6 +117,7 @@ from .veehd import VeeHDIE
 from .veoh import VeohIE
 from .vevo import VevoIE
 from .vice import ViceIE
 from .viddler import ViddlerIE
 from .videofyme import VideofyMeIE
 from .vimeo import VimeoIE, VimeoChannelIE
 from .vine import VineIE
--- a/youtube_dl/extractor/viddler.py
+++ b/youtube_dl/extractor/viddler.py
@ -0,0 +1,64 @@
 import json
 import re
 from .common import InfoExtractor
 from ..utils import (
    determine_ext,
 )
 class ViddlerIE(InfoExtractor):
    _VALID_URL = r'(?P<domain>https?://(?:www\.)?viddler.com)/(?:v|embed|player)/(?P<id>[0-9]+)'
    _TEST = {
        u"url": u"http://www.viddler.com/v/43903784",
        u'file': u'43903784.mp4',
        u'md5': u'fbbaedf7813e514eb7ca30410f439ac9',
        u'info_dict': {
            u"title": u"Video Made Easy",
            u"uploader": u"viddler",
            u"duration": 100.89,
        }
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        embed_url = mobj.group('domain') + u'/embed/' + video_id
        webpage = self._download_webpage(embed_url, video_id)
        video_sources_code = self._search_regex(
            r"(?ms)sources\s*:\s*(\{.*?\})", webpage, u'video URLs')
        video_sources = json.loads(video_sources_code.replace("'", '"'))
        formats = [{
            'url': video_url,
            'format': format_id,
        } for video_url, format_id in video_sources.items()]
        title = self._html_search_regex(
            r"title\s*:\s*'([^']*)'", webpage, u'title')
        uploader = self._html_search_regex(
            r"authorName\s*:\s*'([^']*)'", webpage, u'uploader', fatal=False)
        duration_s = self._html_search_regex(
            r"duration\s*:\s*([0-9.]*)", webpage, u'duration', fatal=False)
        duration = float(duration_s) if duration_s else None
        thumbnail = self._html_search_regex(
            r"thumbnail\s*:\s*'([^']*)'",
            webpage, u'thumbnail', fatal=False)
        info = {
            '_type': 'video',
            'id': video_id,
            'title': title,
            'thumbnail': thumbnail,
            'uploader': uploader,
            'duration': duration,
            'formats': formats,
        }
        # TODO: Remove when #980 has been merged
        info['formats'][-1]['ext'] = determine_ext(info['formats'][-1]['url'])
        info.update(info['formats'][-1])
        return info
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@ -175,7 +175,7 @@ def compat_ord(c):
 compiled_regex_type = type(re.compile(''))
 std_headers = {
    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0',
    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0 (Chrome)',
    'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
    'Accept-Encoding': 'gzip, deflate',