Browse Source

Merge branch 'googledrive' of github.com:remitamine/youtube-dl into remitamine-googledrive

totalwebcasting
remitamine 9 years ago
parent
commit
a8ae232fa9
3 changed files with 95 additions and 0 deletions
  1. +1
    -0
      youtube_dl/extractor/__init__.py
  2. +6
    -0
      youtube_dl/extractor/generic.py
  3. +88
    -0
      youtube_dl/extractor/googledrive.py

+ 1
- 0
youtube_dl/extractor/__init__.py View File

@ -231,6 +231,7 @@ from .globo import (
from .godtube import GodTubeIE from .godtube import GodTubeIE
from .goldenmoustache import GoldenMoustacheIE from .goldenmoustache import GoldenMoustacheIE
from .golem import GolemIE from .golem import GolemIE
from .googledrive import GoogleDriveIE
from .googleplus import GooglePlusIE from .googleplus import GooglePlusIE
from .googlesearch import GoogleSearchIE from .googlesearch import GoogleSearchIE
from .goshgay import GoshgayIE from .goshgay import GoshgayIE


+ 6
- 0
youtube_dl/extractor/generic.py View File

@ -55,6 +55,7 @@ from .snagfilms import SnagFilmsEmbedIE
from .screenwavemedia import ScreenwaveMediaIE from .screenwavemedia import ScreenwaveMediaIE
from .mtv import MTVServicesEmbeddedIE from .mtv import MTVServicesEmbeddedIE
from .pladform import PladformIE from .pladform import PladformIE
from .googledrive import GoogleDriveIE
class GenericIE(InfoExtractor): class GenericIE(InfoExtractor):
@ -1769,6 +1770,11 @@ class GenericIE(InfoExtractor):
if nbc_sports_url: if nbc_sports_url:
return self.url_result(nbc_sports_url, 'NBCSportsVPlayer') return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
# Look for Google Drive embeds
google_drive_url = GoogleDriveIE._extract_url(webpage)
if google_drive_url:
return self.url_result(google_drive_url, 'GoogleDrive')
# Look for UDN embeds # Look for UDN embeds
mobj = re.search( mobj = re.search(
r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage) r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)


+ 88
- 0
youtube_dl/extractor/googledrive.py View File

@ -0,0 +1,88 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
)
class GoogleDriveIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)|video\.google\.com/get_player\?.*?docid=)(?P<id>[a-zA-Z0-9_-]{28})'
_TEST = {
'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1',
'md5': '881f7700aec4f538571fa1e0eed4a7b6',
'info_dict': {
'id': '0ByeS4oOUV-49Zzh4R1J6R09zazQ',
'ext': 'mp4',
'title': 'Big Buck Bunny.mp4',
'duration': 46,
}
}
_FORMATS_EXT = {
'5': 'flv',
'6': 'flv',
'13': '3gp',
'17': '3gp',
'18': 'mp4',
'22': 'mp4',
'34': 'flv',
'35': 'flv',
'36': '3gp',
'37': 'mp4',
'38': 'mp4',
'43': 'webm',
'44': 'webm',
'45': 'webm',
'46': 'webm',
'59': 'mp4',
}
@staticmethod
def _extract_url(webpage):
mobj = re.search(
r'<iframe[^>]+src="https?://(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/file/d/)(?P<id>[a-zA-Z0-9_-]{28})',
webpage)
if mobj:
return 'https://drive.google.com/file/d/%s' % mobj.group('id')
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
'http://docs.google.com/file/d/%s' % video_id, video_id, encoding='unicode_escape')
reason = self._search_regex(r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None)
if reason:
raise ExtractorError(reason)
title = self._search_regex(r'"title"\s*,\s*"([^"]+)', webpage, 'title')
duration = int_or_none(self._search_regex(
r'"length_seconds"\s*,\s*"([^"]+)', webpage, 'length seconds', default=None))
fmt_stream_map = self._search_regex(
r'"fmt_stream_map"\s*,\s*"([^"]+)', webpage, 'fmt stream map').split(',')
fmt_list = self._search_regex(r'"fmt_list"\s*,\s*"([^"]+)', webpage, 'fmt_list').split(',')
formats = []
for fmt, fmt_stream in zip(fmt_list, fmt_stream_map):
fmt_id, fmt_url = fmt_stream.split('|')
resolution = fmt.split('/')[1]
width, height = resolution.split('x')
formats.append({
'url': fmt_url,
'format_id': fmt_id,
'resolution': resolution,
'width': int_or_none(width),
'height': int_or_none(height),
'ext': self._FORMATS_EXT[fmt_id],
})
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'thumbnail': self._og_search_thumbnail(webpage),
'duration': duration,
'formats': formats,
}

Loading…
Cancel
Save