Merge pull request #3690 from naglis/sharesix

[sharesix] Add new extractor
10 years ago · 9face18d08
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -297,6 +297,7 @@ from .scivee import SciVeeIE
 from .screencast import ScreencastIE
 from .servingsys import ServingSysIE
 from .shared import SharedIE
 from .sharesix import ShareSixIE
 from .sina import SinaIE
 from .slideshare import SlideshareIE
 from .slutload import SlutloadIE
--- a/youtube_dl/extractor/sharesix.py
+++ b/youtube_dl/extractor/sharesix.py
@ -0,0 +1,91 @@
 # coding: utf-8
 from __future__ import unicode_literals

 import re

 from .common import InfoExtractor
 from ..utils import (
    compat_urllib_parse,
    compat_urllib_request,
    parse_duration,
 )


 class ShareSixIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?sharesix\.com/(?:f/)?(?P<id>[0-9a-zA-Z]+)'
    _TESTS = [
        {
            'url': 'http://sharesix.com/f/OXjQ7Y6',
            'md5': '9e8e95d8823942815a7d7c773110cc93',
            'info_dict': {
                'id': 'OXjQ7Y6',
                'ext': 'mp4',
                'title': 'big_buck_bunny_480p_surround-fix.avi',
                'duration': 596,
                'width': 854,
                'height': 480,
            },
        },
        {
            'url': 'http://sharesix.com/lfrwoxp35zdd',
            'md5': 'dd19f1435b7cec2d7912c64beeee8185',
            'info_dict': {
                'id': 'lfrwoxp35zdd',
                'ext': 'flv',
                'title': 'WhiteBoard___a_Mac_vs_PC_Parody_Cartoon.mp4.flv',
                'duration': 65,
                'width': 1280,
                'height': 720,
            },
        }
    ]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')

        fields = {
            'method_free': 'Free'
        }
        post = compat_urllib_parse.urlencode(fields)
        req = compat_urllib_request.Request(url, post)
        req.add_header('Content-type', 'application/x-www-form-urlencoded')

        webpage = self._download_webpage(req, video_id,
                                         'Downloading video page')

        video_url = self._search_regex(
            r"var\slnk1\s=\s'([^']+)'", webpage, 'video URL')
        title = self._html_search_regex(
            r'(?s)<dt>Filename:</dt>.+?<dd>(.+?)</dd>', webpage, 'title')
        duration = parse_duration(
            self._search_regex(
                r'(?s)<dt>Length:</dt>.+?<dd>(.+?)</dd>',
                webpage,
                'duration',
                fatal=False
            )
        )

        m = re.search(
            r'''(?xs)<dt>Width\sx\sHeight</dt>.+?
                     <dd>(?P<width>\d+)\sx\s(?P<height>\d+)</dd>''',
            webpage
        )
        width = height = None
        if m:
            width, height = int(m.group('width')), int(m.group('height'))

        formats = [{
            'format_id': 'sd',
            'url': video_url,
            'width': width,
            'height': height,
        }]

        return {
            'id': video_id,
            'title': title,
            'duration': duration,
            'formats': formats,
        }