[firedrive] Add new extractor. Addresses #3095

10 years ago · 678f58de4b
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -83,6 +83,7 @@ from .extremetube import ExtremeTubeIE
 from .facebook import FacebookIE
 from .faz import FazIE
 from .fc2 import FC2IE
 from .firedrive import FiredriveIE
 from .firstpost import FirstpostIE
 from .firsttv import FirstTVIE
 from .fivemin import FiveMinIE
--- a/youtube_dl/extractor/firedrive.py
+++ b/youtube_dl/extractor/firedrive.py
@ -0,0 +1,81 @@
 # coding: utf-8
 from __future__ import unicode_literals

 import re

 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
    compat_urllib_parse,
    compat_urllib_request,
    determine_ext,
 )


 class FiredriveIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?firedrive\.com/' + \
                 '(?:file|embed)/(?P<id>[0-9a-zA-Z]+)'
    _FILE_DELETED_REGEX = r'<div class="removed_file_image">'

    _TESTS = [{
        'url': 'https://www.firedrive.com/file/FEB892FA160EBD01',
        'md5': 'd5d4252f80ebeab4dc2d5ceaed1b7970',
        'info_dict': {
            'id': 'FEB892FA160EBD01',
            'ext': 'flv',
            'title': 'bbb_theora_486kbit.flv',
            'thumbnail': 're:http://.*\.jpg',
        },
    }]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')

        url = 'http://firedrive.com/file/%s' % video_id

        webpage = self._download_webpage(url, video_id)

        if re.search(self._FILE_DELETED_REGEX, webpage) is not None:
            raise ExtractorError(u'Video %s does not exist' % video_id,
                                 expected=True)

        fields = dict(re.findall(r'''(?x)<input\s+
            type="hidden"\s+
            name="([^"]+)"\s+
            (?:id="[^"]+"\s+)?
            value="([^"]*)"
            ''', webpage))

        post = compat_urllib_parse.urlencode(fields)
        req = compat_urllib_request.Request(url, post)
        req.add_header('Content-type', 'application/x-www-form-urlencoded')

        # Apparently, this header is required for confirmation to work.
        req.add_header('Host', 'www.firedrive.com')

        webpage = self._download_webpage(req, video_id,
                                         'Downloading video page')

        title = self._search_regex(r'class="external_title_left">(.+)</div>',
                                   webpage, 'title')
        thumbnail = self._search_regex(r'image:\s?"(//[^\"]+)', webpage,
                                       'thumbnail', fatal=False, default="")
        url = self._search_regex(r'file:\s?\'(http[^\']+)\',',
                                 webpage, 'file url')
        ext = self._search_regex(r'type:\s?\'([^\']+)\',',
                                 webpage, 'extension', fatal=False)

        formats = [{
            'format_id': 'sd',
            'url': url,
            'ext': ext or determine_ext(url),
            'quality': 1,
        }]

        return {
            'id': video_id,
            'title': title,
            'thumbnail': "http:" + thumbnail,
            'formats': formats,
        }