Browse Source

Merge remote-tracking branch 'upstream/master'

totalwebcasting
Andreas Schmitz 11 years ago
parent
commit
f4371f4784
11 changed files with 219 additions and 64 deletions
  1. +2
    -0
      test/test_all_urls.py
  2. +7
    -0
      test/test_youtube_lists.py
  3. +1
    -0
      youtube_dl/__init__.py
  4. +2
    -0
      youtube_dl/extractor/__init__.py
  5. +31
    -52
      youtube_dl/extractor/chilloutzone.py
  6. +1
    -1
      youtube_dl/extractor/elpais.py
  7. +2
    -2
      youtube_dl/extractor/mooshare.py
  8. +89
    -0
      youtube_dl/extractor/ndr.py
  9. +76
    -0
      youtube_dl/extractor/nfb.py
  10. +7
    -8
      youtube_dl/extractor/youtube.py
  11. +1
    -1
      youtube_dl/version.py

+ 2
- 0
test/test_all_urls.py View File

@ -37,6 +37,8 @@ class TestAllURLsMatching(unittest.TestCase):
assertPlaylist(u'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC') assertPlaylist(u'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
assertPlaylist(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') #668 assertPlaylist(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') #668
self.assertFalse('youtube:playlist' in self.matching_ies(u'PLtS2H6bU1M')) self.assertFalse('youtube:playlist' in self.matching_ies(u'PLtS2H6bU1M'))
# Top tracks
assertPlaylist('https://www.youtube.com/playlist?list=MCUS.20142101')
def test_youtube_matching(self): def test_youtube_matching(self):
self.assertTrue(YoutubeIE.suitable(u'PLtS2H6bU1M')) self.assertTrue(YoutubeIE.suitable(u'PLtS2H6bU1M'))


+ 7
- 0
test/test_youtube_lists.py View File

@ -117,6 +117,13 @@ class TestYoutubeLists(unittest.TestCase):
original_video = entries[0] original_video = entries[0]
self.assertEqual(original_video['id'], 'rjFaenf1T-Y') self.assertEqual(original_video['id'], 'rjFaenf1T-Y')
def test_youtube_toptracks(self):
dl = FakeYDL()
ie = YoutubePlaylistIE(dl)
result = ie.extract('https://www.youtube.com/playlist?list=MCUS')
entries = result['entries']
self.assertEqual(len(entries), 100)
def test_youtube_toplist(self): def test_youtube_toplist(self):
dl = FakeYDL() dl = FakeYDL()
ie = YoutubeTopListIE(dl) ie = YoutubeTopListIE(dl)


+ 1
- 0
youtube_dl/__init__.py View File

@ -41,6 +41,7 @@ __authors__ = (
'Chris Gahan', 'Chris Gahan',
'Saimadhav Heblikar', 'Saimadhav Heblikar',
'Mike Col', 'Mike Col',
'Andreas Schmitz',
) )
__license__ = 'Public Domain' __license__ = 'Public Domain'


+ 2
- 0
youtube_dl/extractor/__init__.py View File

@ -143,8 +143,10 @@ from .myvideo import MyVideoIE
from .naver import NaverIE from .naver import NaverIE
from .nba import NBAIE from .nba import NBAIE
from .nbc import NBCNewsIE from .nbc import NBCNewsIE
from .ndr import NDRIE
from .ndtv import NDTVIE from .ndtv import NDTVIE
from .newgrounds import NewgroundsIE from .newgrounds import NewgroundsIE
from .nfb import NFBIE
from .nhl import NHLIE, NHLVideocenterIE from .nhl import NHLIE, NHLVideocenterIE
from .niconico import NiconicoIE from .niconico import NiconicoIE
from .ninegag import NineGagIE from .ninegag import NineGagIE


+ 31
- 52
youtube_dl/extractor/chilloutzone.py View File

@ -1,14 +1,18 @@
from __future__ import unicode_literals
import re import re
import base64 import base64
import urllib
import json import json
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import (
clean_html,
ExtractorError
)
video_container = ('.mp4', '.mkv', '.flv')
class ChilloutzoneIE(InfoExtractor): class ChilloutzoneIE(InfoExtractor):
_VALID_URL = r'(?:https?://)?(?:www\.)?chilloutzone\.net/video/(?P<id>[\w|-]+).html'
_VALID_URL = r'https?://(?:www\.)?chilloutzone\.net/video/(?P<id>[\w|-]+)\.html'
_TEST = { _TEST = {
'url': 'http://www.chilloutzone.net/video/enemene-meck-alle-katzen-weg.html', 'url': 'http://www.chilloutzone.net/video/enemene-meck-alle-katzen-weg.html',
'md5': 'a76f3457e813ea0037e5244f509e66d1', 'md5': 'a76f3457e813ea0037e5244f509e66d1',
@ -16,6 +20,7 @@ class ChilloutzoneIE(InfoExtractor):
'id': 'enemene-meck-alle-katzen-weg', 'id': 'enemene-meck-alle-katzen-weg',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Enemene Meck - Alle Katzen weg', 'title': 'Enemene Meck - Alle Katzen weg',
'description': 'Ist das der Umkehrschluss des Niesenden Panda-Babys?',
}, },
} }
@ -23,71 +28,45 @@ class ChilloutzoneIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id') video_id = mobj.group('id')
webpage_url = 'http://www.chilloutzone.net/video/' + video_id + '.html'
webpage = self._download_webpage(url, video_id)
# Log that we are starting to download the page
self.report_download_webpage(webpage_url)
webpage = self._download_webpage(webpage_url, video_id)
# Log that we are starting to parse the page
self.report_extraction(video_id)
# Find base64 decoded file info
base64_video_info = self._html_search_regex(r'var cozVidData = "(.+?)";', webpage, u'video Data')
# decode string and find video file
base64_video_info = self._html_search_regex(
r'var cozVidData = "(.+?)";', webpage, 'video data')
decoded_video_info = base64.b64decode(base64_video_info).decode("utf-8") decoded_video_info = base64.b64decode(base64_video_info).decode("utf-8")
video_info_dict = json.loads(decoded_video_info) video_info_dict = json.loads(decoded_video_info)
# get video information from dict # get video information from dict
media_url = video_info_dict['mediaUrl']
description = video_info_dict['description']
video_url = video_info_dict['mediaUrl']
description = clean_html(video_info_dict.get('description'))
title = video_info_dict['title'] title = video_info_dict['title']
native_platform = video_info_dict['nativePlatform'] native_platform = video_info_dict['nativePlatform']
native_video_id = video_info_dict['nativeVideoId'] native_video_id = video_info_dict['nativeVideoId']
source_priority = video_info_dict['sourcePriority'] source_priority = video_info_dict['sourcePriority']
# Start video extraction
video_url = ''
# If nativePlatform is None a fallback mechanism is used (i.e. youtube embed) # If nativePlatform is None a fallback mechanism is used (i.e. youtube embed)
if native_platform == None:
# Look for other video urls
video_url = self._html_search_regex(r'<iframe.* src="(.+?)".*', webpage, u'fallback Video URL')
if 'youtube' in video_url:
self.to_screen(u'Youtube video detected:')
return self.url_result(video_url, ie='Youtube')
# For debugging purposes
#print video_info_dict
#print native_platform
#print native_video_id
#print source_priority
#print media_url
if native_platform is None:
youtube_url = self._html_search_regex(
r'<iframe.* src="((?:https?:)?//(?:[^.]+\.)?youtube\.com/.+?)"',
webpage, 'fallback video URL', default=None)
if youtube_url is not None:
return self.url_result(youtube_url, ie='Youtube')
# Non Fallback: Decide to use native source (e.g. youtube or vimeo) or # Non Fallback: Decide to use native source (e.g. youtube or vimeo) or
# the own CDN # the own CDN
if source_priority == 'native': if source_priority == 'native':
if native_platform == 'youtube': if native_platform == 'youtube':
self.to_screen(u'Youtube video detected:')
video_url = 'https://www.youtube.com/watch?v=' + native_video_id
return self.url_result(video_url, ie='Youtube')
return self.url_result(video_id, ie='Youtube')
if native_platform == 'vimeo': if native_platform == 'vimeo':
self.to_screen(u'Vimeo video detected:')
video_url = 'http://vimeo.com/' + native_video_id
return self.url_result(video_url, ie='Vimeo')
# No redirect, use coz media url
video_url = media_url
if video_url.endswith('.mp4') == False:
self.report_warning(u'Url does not contain a video container')
return []
return self.url_result(
'http://vimeo.com/' + native_video_id, ie='Vimeo')
if not video_url:
raise ExtractorError('No video found')
return [{
'id': video_id,
'url': video_url,
'ext': 'mp4',
'title': title,
return {
'id': video_id,
'url': video_url,
'ext': 'mp4',
'title': title,
'description': description, 'description': description,
}]
}

+ 1
- 1
youtube_dl/extractor/elpais.py View File

@ -9,7 +9,7 @@ from ..utils import unified_strdate
class ElPaisIE(InfoExtractor): class ElPaisIE(InfoExtractor):
_VALID_URL = r'https?://(?:[^.]+\.)?elpais\.com/.*/(?P<id>[^/#?]+)\.html(?:$|[?#])' _VALID_URL = r'https?://(?:[^.]+\.)?elpais\.com/.*/(?P<id>[^/#?]+)\.html(?:$|[?#])'
IE_DESCR = 'El País'
IE_DESC = 'El País'
_TEST = { _TEST = {
'url': 'http://blogs.elpais.com/la-voz-de-inaki/2014/02/tiempo-nuevo-recetas-viejas.html', 'url': 'http://blogs.elpais.com/la-voz-de-inaki/2014/02/tiempo-nuevo-recetas-viejas.html',


+ 2
- 2
youtube_dl/extractor/mooshare.py View File

@ -61,7 +61,7 @@ class MooshareIE(InfoExtractor):
} }
request = compat_urllib_request.Request( request = compat_urllib_request.Request(
'http://mooshare.biz/8dqtk4bjbp8g', compat_urllib_parse.urlencode(download_form))
'http://mooshare.biz/%s' % video_id, compat_urllib_parse.urlencode(download_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded') request.add_header('Content-Type', 'application/x-www-form-urlencoded')
self.to_screen('%s: Waiting for timeout' % video_id) self.to_screen('%s: Waiting for timeout' % video_id)
@ -111,4 +111,4 @@ class MooshareIE(InfoExtractor):
'thumbnail': thumbnail, 'thumbnail': thumbnail,
'duration': duration, 'duration': duration,
'formats': formats, 'formats': formats,
}
}

+ 89
- 0
youtube_dl/extractor/ndr.py View File

@ -0,0 +1,89 @@
# encoding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import ExtractorError
class NDRIE(InfoExtractor):
IE_NAME = 'ndr'
IE_DESC = 'NDR.de - Mediathek'
_VALID_URL = r'https?://www\.ndr\.de/.+?(?P<id>\d+)\.html'
_TESTS = [
# video
{
'url': 'http://www.ndr.de/fernsehen/sendungen/hallo_niedersachsen/media/hallonds19925.html',
'md5': '20eba151ff165f386643dad9c1da08f7',
'info_dict': {
'id': '19925',
'ext': 'mp4',
'title': 'Hallo Niedersachsen ',
'description': 'Bei Hallo Niedersachsen um 19:30 Uhr erfahren Sie alles, was am Tag in Niedersachsen los war.',
'duration': 1722,
},
},
# audio
{
'url': 'http://www.ndr.de/903/audio191719.html',
'md5': '41ed601768534dd18a9ae34d84798129',
'info_dict': {
'id': '191719',
'ext': 'mp3',
'title': '"Es war schockierend"',
'description': 'md5:ed7ff8364793545021a6355b97e95f10',
'duration': 112,
}
}
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
page = self._download_webpage(url, video_id, 'Downloading page')
title = self._og_search_title(page)
description = self._og_search_description(page)
mobj = re.search(
r'<div class="duration"><span class="min">(?P<minutes>\d+)</span>:<span class="sec">(?P<seconds>\d+)</span></div>',
page)
duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None
formats = []
mp3_url = re.search(r'''{src:'(?P<audio>[^']+)', type:"audio/mp3"},''', page)
if mp3_url:
formats.append({
'url': mp3_url.group('audio'),
'format_id': 'mp3',
})
thumbnail = None
video_url = re.search(r'''3: {src:'(?P<video>.+?)\.hi\.mp4', type:"video/mp4"},''', page)
if video_url:
thumbnail = self._html_search_regex(r'(?m)title: "NDR PLAYER",\s*poster: "([^"]+)",',
page, 'thumbnail', fatal=False)
if thumbnail:
thumbnail = 'http://www.ndr.de' + thumbnail
for format_id in ['lo', 'hi', 'hq']:
formats.append({
'url': '%s.%s.mp4' % (video_url.group('video'), format_id),
'format_id': format_id,
})
if not formats:
raise ExtractorError('No media links available for %s' % video_id)
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'duration': duration,
'formats': formats,
}

+ 76
- 0
youtube_dl/extractor/nfb.py View File

@ -0,0 +1,76 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
compat_urllib_request,
compat_urllib_parse,
)
class NFBIE(InfoExtractor):
IE_NAME = 'nfb'
IE_DESC = 'National Film Board of Canada'
_VALID_URL = r'https?://(?:www\.)?(nfb|onf)\.ca/film/(?P<id>[\da-z_-]+)'
_TEST = {
'url': 'https://www.nfb.ca/film/qallunaat_why_white_people_are_funny',
'info_dict': {
'id': 'qallunaat_why_white_people_are_funny',
'ext': 'mp4',
'title': 'Qallunaat! Why White People Are Funny ',
'description': 'md5:836d8aff55e087d04d9f6df554d4e038',
'duration': 3128,
'uploader': 'Mark Sandiford',
'uploader_id': 'mark-sandiford',
},
'params': {
# rtmp download
'skip_download': True,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
page = self._download_webpage('https://www.nfb.ca/film/%s' % video_id, video_id, 'Downloading film page')
uploader_id = self._html_search_regex(r'<a class="director-link" href="/explore-all-directors/([^/]+)/"',
page, 'director id', fatal=False)
uploader = self._html_search_regex(r'<em class="director-name" itemprop="name">([^<]+)</em>',
page, 'director name', fatal=False)
request = compat_urllib_request.Request('https://www.nfb.ca/film/%s/player_config' % video_id,
compat_urllib_parse.urlencode({'getConfig': 'true'}).encode('ascii'))
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
request.add_header('X-NFB-Referer', 'http://www.nfb.ca/medias/flash/NFBVideoPlayer.swf')
config = self._download_xml(request, video_id, 'Downloading player config XML')
thumbnail = config.find("./player/stream/media[@type='posterImage']/assets/asset[@quality='high']/default/url").text
video = config.find("./player/stream/media[@type='video']")
duration = int(video.get('duration'))
title = video.find('title').text
description = video.find('description').text
# It seems assets always go from lower to better quality, so no need to sort
formats = [{
'url': x.find('default/streamerURI').text + '/',
'play_path': x.find('default/url').text,
'rtmp_live': False,
'ext': 'mp4',
'format_id': x.get('quality'),
} for x in video.findall('assets/asset')]
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'duration': duration,
'uploader': uploader,
'uploader_id': uploader_id,
'formats': formats,
}

+ 7
- 8
youtube_dl/extractor/youtube.py View File

@ -1422,7 +1422,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
class YoutubePlaylistIE(YoutubeBaseInfoExtractor): class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
IE_DESC = u'YouTube.com playlists' IE_DESC = u'YouTube.com playlists'
_VALID_URL = r"""(?:
_VALID_URL = r"""(?x)(?:
(?:https?://)? (?:https?://)?
(?:\w+\.)? (?:\w+\.)?
youtube\.com/ youtube\.com/
@ -1431,7 +1431,11 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
\? (?:.*?&)*? (?:p|a|list)= \? (?:.*?&)*? (?:p|a|list)=
| p/ | p/
) )
((?:PL|EC|UU|FL|RD)?[0-9A-Za-z-_]{10,})
(
(?:PL|EC|UU|FL|RD)?[0-9A-Za-z-_]{10,}
# Top tracks, they can also include dots
|(?:MC)[\w\.]*
)
.* .*
| |
((?:PL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,}) ((?:PL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,})
@ -1441,11 +1445,6 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
_VIDEO_RE = r'href="/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)' _VIDEO_RE = r'href="/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)'
IE_NAME = u'youtube:playlist' IE_NAME = u'youtube:playlist'
@classmethod
def suitable(cls, url):
"""Receives a URL and returns True if suitable for this IE."""
return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
def _real_initialize(self): def _real_initialize(self):
self._login() self._login()
@ -1469,7 +1468,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
# Extract playlist id # Extract playlist id
mobj = re.match(self._VALID_URL, url, re.VERBOSE)
mobj = re.match(self._VALID_URL, url)
if mobj is None: if mobj is None:
raise ExtractorError(u'Invalid URL: %s' % url) raise ExtractorError(u'Invalid URL: %s' % url)
playlist_id = mobj.group(1) or mobj.group(2) playlist_id = mobj.group(1) or mobj.group(2)


+ 1
- 1
youtube_dl/version.py View File

@ -1,2 +1,2 @@
__version__ = '2014.02.06.1'
__version__ = '2014.02.06.3'

Loading…
Cancel
Save