[3sat] Add support (Fixes #1001)

12 years ago · 73e79f2a1b
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -11,6 +11,7 @@ from .comedycentral import ComedyCentralIE
 from .cspan import CSpanIE
 from .dailymotion import DailymotionIE
 from .depositfiles import DepositFilesIE
 from .dreisat import DreiSatIE
 from .eighttracks import EightTracksIE
 from .escapist import EscapistIE
 from .facebook import FacebookIE
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@ -36,6 +36,8 @@ class InfoExtractor(object):
    The following fields are optional:

    format:         The video format, defaults to ext (used for --get-format)
    thumbnails:     A list of dictionaries (with the entries "resolution" and
                    "url") for the varying thumbnails
    thumbnail:      Full URL to a video thumbnail image.
    description:    One-line video description.
    uploader:       Full name of the video uploader.
--- a/youtube_dl/extractor/dreisat.py
+++ b/youtube_dl/extractor/dreisat.py
@ -0,0 +1,85 @@
 # coding: utf-8

 import re
 import xml.etree.ElementTree

 from .common import InfoExtractor
 from ..utils import (
    determine_ext,
    ExtractorError,
    unified_strdate,
 )


 class DreiSatIE(InfoExtractor):
    IE_NAME = '3sat'
    _VALID_URL = r'(?:http://)?(?:www\.)?3sat.de/mediathek/index.php\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
    _TEST = {
        u"url": u"http://www.3sat.de/mediathek/index.php?obj=36983",
        u'file': u'36983.webm',
        u'md5': u'57c97d0469d71cf874f6815aa2b7c944',
        u'info_dict': {
            u"title": u"Kaffeeland Schweiz",
            u"description": u"Über 80 Kaffeeröstereien liefern in der Schweiz das Getränk, in das das Land so vernarrt ist: Mehr als 1000 Tassen trinkt ein Schweizer pro Jahr. SCHWEIZWEIT nimmt die Kaffeekultur unter die...", 
            u"uploader": u"3sat",
            u"upload_date": u"20130622"
        }
    }


    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
        details_xml = self._download_webpage(details_url, video_id, note=u'Downloading video details')
        details_doc = xml.etree.ElementTree.fromstring(details_xml.encode('utf-8'))

        thumbnail_els = details_doc.findall('.//teaserimage')
        thumbnails = [{
            'width': te.attrib['key'].partition('x')[0],
            'height': te.attrib['key'].partition('x')[2],
            'url': te.text,
        } for te in thumbnail_els]

        information_el = details_doc.find('.//information')
        video_title = information_el.find('./title').text
        video_description = information_el.find('./detail').text

        details_el = details_doc.find('.//details')
        video_uploader = details_el.find('./channel').text
        upload_date = unified_strdate(details_el.find('./airtime').text)

        format_els = details_doc.findall('.//formitaet')
        formats = [{
            'format_id': fe.attrib['basetype'],
            'width': int(fe.find('./width').text),
            'height': int(fe.find('./height').text),
            'url': fe.find('./url').text,
            'filesize': int(fe.find('./filesize').text),
            'video_bitrate': int(fe.find('./videoBitrate').text),
            '3sat_qualityname': fe.find('./quality').text,
        } for fe in format_els
            if not fe.find('./url').text.startswith('http://www.metafilegenerator.de/')]

        def _sortkey(format):
            qidx = ['low', 'med', 'high', 'veryhigh'].index(format['3sat_qualityname'])
            prefer_http = 1 if 'rtmp' in format['url'] else 0
            return (qidx, prefer_http, format['video_bitrate'])
        formats.sort(key=_sortkey)

        info = {
            'id': video_id,
            'title': video_title,
            'formats': formats,
            'description': video_description,
            'thumbnails': thumbnails,
            'thumbnail': thumbnails[-1]['url'],
            'uploader': video_uploader,
            'upload_date': upload_date,
        }

        # TODO: Remove when #980 has been merged
        info['url'] = formats[-1]['url']
        info['ext'] = determine_ext(formats[-1]['url'])

        return self.video_result(info)
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@ -623,7 +623,7 @@ def unified_strdate(date_str):
    date_str = date_str.replace(',',' ')
    # %z (UTC offset) is only supported in python>=3.2
    date_str = re.sub(r' (\+|-)[\d]*$', '', date_str)
    format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y', '%Y-%m-%d', '%d/%m/%Y', '%Y/%m/%d %H:%M:%S']
    format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y', '%Y-%m-%d', '%d/%m/%Y', '%Y/%m/%d %H:%M:%S', '%d.%m.%Y %H:%M']
    for expression in format_expressions:
        try:
            upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
@ -631,6 +631,13 @@ def unified_strdate(date_str):
            pass
    return upload_date

 def determine_ext(url):
    guess = url.partition(u'?')[0].rpartition(u'.')[2]
    if re.match(r'^[A-Za-z0-9]+$', guess):
        return guess
    else:
        return u'unknown_video'

 def date_from_str(date_str):
    """
    Return a datetime object from a string in the format YYYYMMDD or