Browse Source

add MDRIE

totalwebcasting
mc2avr 11 years ago
parent
commit
df1d7da2af
2 changed files with 79 additions and 0 deletions
  1. +1
    -0
      youtube_dl/extractor/__init__.py
  2. +78
    -0
      youtube_dl/extractor/mdr.py

+ 1
- 0
youtube_dl/extractor/__init__.py View File

@ -86,6 +86,7 @@ from .kickstarter import KickStarterIE
from .keek import KeekIE from .keek import KeekIE
from .liveleak import LiveLeakIE from .liveleak import LiveLeakIE
from .livestream import LivestreamIE, LivestreamOriginalIE from .livestream import LivestreamIE, LivestreamOriginalIE
from .mdr import MDRIE
from .metacafe import MetacafeIE from .metacafe import MetacafeIE
from .metacritic import MetacriticIE from .metacritic import MetacriticIE
from .mit import TechTVMITIE, MITIE from .mit import TechTVMITIE, MITIE


+ 78
- 0
youtube_dl/extractor/mdr.py View File

@ -0,0 +1,78 @@
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
)
class MDRIE(InfoExtractor):
_VALID_URL = r'^(?P<domain>(?:https?://)?(?:www\.)?mdr\.de)/mediathek/(?:.*)/(?P<type>video|audio)(?P<video_id>[^/_]+)_.*'
_TITLE = r'<h2>(?P<title1>[^<]+)<span>(?P<title2>[^<]+)</span></h2>'
_MEDIA_XML = r'(?P<xmlurl>/mediathek/(.+)/(video|audio)([0-9]+)-avCustom.xml)'
_MEDIA_STREAM_VIDEO = r'<asset>.*<frameWidth>(?P<frameWidth>[0-9]+)</frameWidth>.*<flashMediaServerApplicationURL>(?P<flashMediaServerApplicationURL>[^<]+)</flashMediaServerApplicationURL><flashMediaServerURL>(?P<flashMediaServerURL>[^<]+)</flashMediaServerURL>.*<progressiveDownloadUrl>(?P<progressiveDownloadUrl>[^<]+)</progressiveDownloadUrl></asset>'
_MEDIA_STREAM_AUDIO = r'<asset>.*<mediaType>(?P<mediaType>[A-Z0-9]+)</mediaType><bitrateAudio>(?P<bitrateAudio>[0-9]+)</bitrateAudio>.*<flashMediaServerApplicationURL>(?P<flashMediaServerApplicationURL>[^<]+)</flashMediaServerApplicationURL><flashMediaServerURL>(?P<flashMediaServerURL>[^<]+)</flashMediaServerURL>.*<progressiveDownloadUrl>(?P<progressiveDownloadUrl>[^<]+)</progressiveDownloadUrl></asset>'
_TESTS = [{
u'url': u'http://www.mdr.de/mediathek/themen/nachrichten/video165624_zc-c5c7de76_zs-3795826d.html',
u'file': u'165624.mp4',
u'md5': u'95165945756198b8fa2dea10f0b04614',
u'info_dict': {
u"title": u"MDR aktuell Eins30 09.12.2013, 22:48 Uhr"
},
#u'skip': u'Requires rtmpdump' # rtmp is optional
},
{
u'url': u' http://www.mdr.de/mediathek/radio/mdr1-radio-sachsen/audio718370_zc-67b21197_zs-1b9b2483.html',
u'file': u'718370.mp4',
u'md5': u'4a5b1fbb5519fb0d929c384b6ff7cb8b',
u'info_dict': {
u"title": u"MDR 1 RADIO SACHSEN 10.12.2013, 05:00 Uhr"
},
#u'skip': u'Requires rtmpdump' # rtmp is optional
}]
def _real_extract(self, url):
# determine video id from url
m = re.match(self._VALID_URL, url)
video_id = m.group('video_id')
domain = m.group('domain')
mediatype = m.group('type')
# determine title and media streams from webpage
html = self._download_webpage(url, video_id)
t = re.search(self._TITLE, html)
if not t:
raise ExtractorError(u'no title found')
title = t.group('title1') + t.group('title2')
m = re.search(self._MEDIA_XML, html)
if not m:
raise ExtractorError(u'no xml found')
xmlurl = m.group('xmlurl')
xml = self._download_webpage(domain+xmlurl, video_id, 'download XML').replace('\n','').replace('\r','').replace('<asset>','\n<asset>').replace('</asset>','</asset>\n')
if(mediatype == "video"):
streams = [mo.groupdict() for mo in re.finditer(self._MEDIA_STREAM_VIDEO, xml)]
if not streams:
raise ExtractorError(u'no media found')
# choose default media type and highest quality for now
stream = max([s for s in streams if s["progressiveDownloadUrl"].startswith("http://") ],
key=lambda s: int(s["frameWidth"]))
else:
streams = [mo.groupdict() for mo in re.finditer(self._MEDIA_STREAM_AUDIO, xml)]
if not streams:
raise ExtractorError(u'no media found')
# choose default media type (MP4) and highest quality for now
stream = max([s for s in streams if s["progressiveDownloadUrl"].startswith("http://") and s["mediaType"] == "MP4" ],
key=lambda s: int(s["bitrateAudio"]))
# there's two possibilities: RTMP stream or HTTP download
info = {'id': video_id, 'title': title, 'ext': 'mp4'}
if not stream["progressiveDownloadUrl"]:
self.to_screen(u'RTMP download detected')
assert stream['flashMediaServerURL'].startswith('mp4:')
info["url"] = stream["flashMediaServerApplicationURL"]
info["play_path"] = stream['flashMediaServerURL']
else:
assert stream["progressiveDownloadUrl"].endswith('.mp4')
info["url"] = stream["progressiveDownloadUrl"]
return [info]

Loading…
Cancel
Save