Browse Source

[firedrive] Add new extractor. Addresses #3095

totalwebcasting
Naglis Jonaitis 10 years ago
parent
commit
678f58de4b
2 changed files with 82 additions and 0 deletions
  1. +1
    -0
      youtube_dl/extractor/__init__.py
  2. +81
    -0
      youtube_dl/extractor/firedrive.py

+ 1
- 0
youtube_dl/extractor/__init__.py View File

@ -83,6 +83,7 @@ from .extremetube import ExtremeTubeIE
from .facebook import FacebookIE
from .faz import FazIE
from .fc2 import FC2IE
from .firedrive import FiredriveIE
from .firstpost import FirstpostIE
from .firsttv import FirstTVIE
from .fivemin import FiveMinIE


+ 81
- 0
youtube_dl/extractor/firedrive.py View File

@ -0,0 +1,81 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
compat_urllib_parse,
compat_urllib_request,
determine_ext,
)
class FiredriveIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?firedrive\.com/' + \
'(?:file|embed)/(?P<id>[0-9a-zA-Z]+)'
_FILE_DELETED_REGEX = r'<div class="removed_file_image">'
_TESTS = [{
'url': 'https://www.firedrive.com/file/FEB892FA160EBD01',
'md5': 'd5d4252f80ebeab4dc2d5ceaed1b7970',
'info_dict': {
'id': 'FEB892FA160EBD01',
'ext': 'flv',
'title': 'bbb_theora_486kbit.flv',
'thumbnail': 're:http://.*\.jpg',
},
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
url = 'http://firedrive.com/file/%s' % video_id
webpage = self._download_webpage(url, video_id)
if re.search(self._FILE_DELETED_REGEX, webpage) is not None:
raise ExtractorError(u'Video %s does not exist' % video_id,
expected=True)
fields = dict(re.findall(r'''(?x)<input\s+
type="hidden"\s+
name="([^"]+)"\s+
(?:id="[^"]+"\s+)?
value="([^"]*)"
''', webpage))
post = compat_urllib_parse.urlencode(fields)
req = compat_urllib_request.Request(url, post)
req.add_header('Content-type', 'application/x-www-form-urlencoded')
# Apparently, this header is required for confirmation to work.
req.add_header('Host', 'www.firedrive.com')
webpage = self._download_webpage(req, video_id,
'Downloading video page')
title = self._search_regex(r'class="external_title_left">(.+)</div>',
webpage, 'title')
thumbnail = self._search_regex(r'image:\s?"(//[^\"]+)', webpage,
'thumbnail', fatal=False, default="")
url = self._search_regex(r'file:\s?\'(http[^\']+)\',',
webpage, 'file url')
ext = self._search_regex(r'type:\s?\'([^\']+)\',',
webpage, 'extension', fatal=False)
formats = [{
'format_id': 'sd',
'url': url,
'ext': ext or determine_ext(url),
'quality': 1,
}]
return {
'id': video_id,
'title': title,
'thumbnail': "http:" + thumbnail,
'formats': formats,
}

Loading…
Cancel
Save