Browse Source

[googledrive] Modernize

totalwebcasting
remitamine 9 years ago
parent
commit
5b251628e9
3 changed files with 53 additions and 100 deletions
  1. +1
    -4
      youtube_dl/extractor/__init__.py
  2. +2
    -2
      youtube_dl/extractor/generic.py
  3. +50
    -94
      youtube_dl/extractor/googledrive.py

+ 1
- 4
youtube_dl/extractor/__init__.py View File

@ -209,10 +209,7 @@ from .globo import GloboIE
from .godtube import GodTubeIE from .godtube import GodTubeIE
from .goldenmoustache import GoldenMoustacheIE from .goldenmoustache import GoldenMoustacheIE
from .golem import GolemIE from .golem import GolemIE
from .googledrive import (
GoogleDriveEmbedIE,
GoogleDriveIE,
)
from .googledrive import GoogleDriveIE
from .googleplus import GooglePlusIE from .googleplus import GooglePlusIE
from .googlesearch import GoogleSearchIE from .googlesearch import GoogleSearchIE
from .gorillavid import GorillaVidIE from .gorillavid import GorillaVidIE


+ 2
- 2
youtube_dl/extractor/generic.py View File

@ -48,7 +48,7 @@ from .vimeo import VimeoIE
from .dailymotion import DailymotionCloudIE from .dailymotion import DailymotionCloudIE
from .onionstudios import OnionStudiosIE from .onionstudios import OnionStudiosIE
from .snagfilms import SnagFilmsEmbedIE from .snagfilms import SnagFilmsEmbedIE
from .googledrive import GoogleDriveEmbedIE
from .googledrive import GoogleDriveIE
class GenericIE(InfoExtractor): class GenericIE(InfoExtractor):
@ -1601,7 +1601,7 @@ class GenericIE(InfoExtractor):
return self.url_result(nbc_sports_url, 'NBCSportsVPlayer') return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
# Look for Google Drive embeds # Look for Google Drive embeds
google_drive_url = GoogleDriveEmbedIE._extract_url(webpage)
google_drive_url = GoogleDriveIE._extract_url(webpage)
if google_drive_url: if google_drive_url:
return self.url_result(google_drive_url, 'GoogleDrive') return self.url_result(google_drive_url, 'GoogleDrive')


+ 50
- 94
youtube_dl/extractor/googledrive.py View File

@ -1,132 +1,88 @@
from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
RegexNotFoundError,
ExtractorError, ExtractorError,
int_or_none,
) )
class GoogleDriveEmbedIE(InfoExtractor):
_VALID_URL = r'https?://(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/file/d/)(?P<id>[a-zA-Z0-9_-]{28})'
class GoogleDriveIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)|video\.google\.com/get_player\?.*?docid=)(?P<id>[a-zA-Z0-9_-]{28})'
_TEST = { _TEST = {
'url': 'https://docs.google.com/file/d/0B8KB9DRosYGKMXNoeWxqa3JYclE/preview',
'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1',
'md5': '881f7700aec4f538571fa1e0eed4a7b6',
'info_dict': { 'info_dict': {
'id': '0B8KB9DRosYGKMXNoeWxqa3JYclE',
'id': '0ByeS4oOUV-49Zzh4R1J6R09zazQ',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Jimmy Fallon Sings Since You\'ve Been Gone.wmv',
'title': 'Big Buck Bunny.mp4',
'duration': 46,
} }
} }
_FORMATS_EXT = {
'5': 'flv',
'6': 'flv',
'13': '3gp',
'17': '3gp',
'18': 'mp4',
'22': 'mp4',
'34': 'flv',
'35': 'flv',
'36': '3gp',
'37': 'mp4',
'38': 'mp4',
'43': 'webm',
'44': 'webm',
'45': 'webm',
'46': 'webm',
'59': 'mp4',
}
@staticmethod @staticmethod
def _extract_url(webpage): def _extract_url(webpage):
mobj = re.search( mobj = re.search(
r'<iframe src="https?://(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/file/d/)(?P<id>[a-zA-Z0-9_-]{28})',
r'<iframe[^>]+src="https?://(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/file/d/)(?P<id>[a-zA-Z0-9_-]{28})',
webpage) webpage)
if mobj: if mobj:
return 'https://drive.google.com/file/d/%s' % mobj.group('id') return 'https://drive.google.com/file/d/%s' % mobj.group('id')
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
return {
'_type': 'url',
'ie_key': 'GoogleDrive',
'url': 'https://drive.google.com/file/d/%s' % video_id
}
webpage = self._download_webpage(
'http://docs.google.com/file/d/%s' % video_id, video_id, encoding='unicode_escape')
class GoogleDriveIE(InfoExtractor):
_VALID_URL = r'https?://(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)(?P<id>[a-zA-Z0-9_-]{28})'
_TEST = {
'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1',
'info_dict': {
'id': '0ByeS4oOUV-49Zzh4R1J6R09zazQ',
'ext': 'mp4',
'title': 'Big Buck Bunny.mp4',
}
}
_formats = {
'5': {'ext': 'flv'},
'6': {'ext': 'flv'},
'13': {'ext': '3gp'},
'17': {'ext': '3gp'},
'18': {'ext': 'mp4'},
'22': {'ext': 'mp4'},
'34': {'ext': 'flv'},
'35': {'ext': 'flv'},
'36': {'ext': '3gp'},
'37': {'ext': 'mp4'},
'38': {'ext': 'mp4'},
'43': {'ext': 'webm'},
'44': {'ext': 'webm'},
'45': {'ext': 'webm'},
'46': {'ext': 'webm'},
'59': {'ext': 'mp4'}
}
reason = self._search_regex(r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None)
if reason:
raise ExtractorError(reason)
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
'http://docs.google.com/file/d/' + video_id, video_id, encoding='unicode_escape'
)
try:
title = self._html_search_regex(
r'"title"\s*,\s*"([^"]+)',
webpage,
'title'
)
fmt_stream_map = self._html_search_regex(
r'"fmt_stream_map"\s*,\s*"([^"]+)',
webpage,
'fmt_stream_map'
)
fmt_list = self._html_search_regex(
r'"fmt_list"\s*,\s*"([^"]+)',
webpage,
'fmt_list'
)
# timestamp = self._html_search_regex(
# r'"timestamp"\s*,\s*"([^"]+)',
# webpage,
# 'timestamp'
# )
length_seconds = self._html_search_regex(
r'"length_seconds"\s*,\s*"([^"]+)',
webpage,
'length_seconds'
)
except RegexNotFoundError:
try:
reason = self._html_search_regex(
r'"reason","([^"]+)',
webpage,
'reason'
)
raise ExtractorError(reason)
return
except RegexNotFoundError:
raise ExtractorError('not a video')
return
title = self._search_regex(r'"title"\s*,\s*"([^"]+)', webpage, 'title')
duration = int_or_none(self._search_regex(
r'"length_seconds"\s*,\s*"([^"]+)', webpage, 'length seconds', default=None))
fmt_stream_map = self._search_regex(
r'"fmt_stream_map"\s*,\s*"([^"]+)', webpage, 'fmt stream map').split(',')
fmt_list = self._search_regex(r'"fmt_list"\s*,\s*"([^"]+)', webpage, 'fmt_list').split(',')
fmt_stream_map = fmt_stream_map.split(',')
fmt_list = fmt_list.split(',')
formats = [] formats = []
for i in range(len(fmt_stream_map)):
fmt_id, fmt_url = fmt_stream_map[i].split('|')
resolution = fmt_list[i].split('/')[1]
for fmt, fmt_stream in zip(fmt_list, fmt_stream_map):
fmt_id, fmt_url = fmt_stream.split('|')
resolution = fmt.split('/')[1]
width, height = resolution.split('x') width, height = resolution.split('x')
formats.append({ formats.append({
'url': fmt_url, 'url': fmt_url,
'format_id': fmt_id, 'format_id': fmt_id,
'resolution': resolution, 'resolution': resolution,
'width': int(width),
'height': int(height),
'ext': self._formats[fmt_id]['ext']
'width': int_or_none(width),
'height': int_or_none(height),
'ext': self._FORMATS_EXT[fmt_id],
}) })
self._sort_formats(formats) self._sort_formats(formats)
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
# 'timestamp': int(timestamp),
'duration': int(length_seconds),
'formats': formats
'thumbnail': self._og_search_thumbnail(webpage),
'duration': duration,
'formats': formats,
} }

Loading…
Cancel
Save