Add support for tou.tv (Fixes #1792)

11 years ago · 5904088811
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -133,6 +133,7 @@ from .techtalks import TechTalksIE
 from .ted import TEDIE
 from .tf1 import TF1IE
 from .thisav import ThisAVIE
 from .toutv import TouTvIE
 from .traileraddict import TrailerAddictIE
 from .trilulilu import TriluliluIE
 from .tube8 import Tube8IE
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@ -350,6 +350,17 @@ class InfoExtractor(object):
        if secure: regexes = self._og_regexes('video:secure_url') + regexes
        return self._html_search_regex(regexes, html, name, **kargs)

    def _html_search_meta(self, name, html, display_name=None):
        if display_name is None:
            display_name = name
        return self._html_search_regex(
            r'''(?ix)<meta(?=[^>]+(?:name|property)=["\']%s["\'])
                    [^>]+content=["\']([^"\']+)["\']''' % re.escape(name),
            html, display_name, fatal=False)

    def _dc_search_uploader(self, html):
        return self._html_search_meta('dc.creator', html, 'uploader')

    def _rta_search(self, html):
        # See http://www.rtalabel.org/index.php?content=howtofaq#single
        if re.search(r'(?ix)<meta\s+name="rating"\s+'
@ -358,6 +369,23 @@ class InfoExtractor(object):
            return 18
        return 0

    def _media_rating_search(self, html):
        # See http://www.tjg-designs.com/WP/metadata-code-examples-adding-metadata-to-your-web-pages/
        rating = self._html_search_meta('rating', html)

        if not rating:
            return None

        RATING_TABLE = {
            'safe for kids': 0,
            'general': 8,
            '14 years': 14,
            'mature': 17,
            'restricted': 19,
        }
        return RATING_TABLE.get(rating.lower(), None)



 class SearchInfoExtractor(InfoExtractor):
    """
--- a/youtube_dl/extractor/toutv.py
+++ b/youtube_dl/extractor/toutv.py
@ -0,0 +1,75 @@
 # coding: utf-8
 import re
 import xml.etree.ElementTree

 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
    unified_strdate,
 )


 class TouTvIE(InfoExtractor):
    IE_NAME = u'tou.tv'
    _VALID_URL = r'https?://www\.tou\.tv/(?P<id>[a-zA-Z0-9_-]+(?:/(?P<episode>S[0-9]+E[0-9]+)))'

    _TEST = {
        u'url': u'http://www.tou.tv/30-vies/S04E41',
        u'file': u'30-vies_S04E41.mp4',
        u'info_dict': {
            u'title': u'30 vies Saison 4 / Épisode 41',
            u'description': u'md5:da363002db82ccbe4dafeb9cab039b09',
            u'age_limit': 8,
            u'uploader': u'Groupe des Nouveaux Médias',
            u'duration': 1296,
            u'upload_date': u'20131118',
            u'thumbnail': u'http://static.tou.tv/medias/images/2013-11-18_19_00_00_30VIES_0341_01_L.jpeg',
        },
        u'params': {
            u'skip_download': True,  # Requires rtmpdump
        },
        u'xskip': 'Only available in Canada'
    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)

        mediaId = self._search_regex(
            r'"idMedia":\s*"([^"]+)"', webpage, u'media ID')

        # TODO test from de
        streams_url = u'http://release.theplatform.com/content.select?pid=' + mediaId
        streams_webpage = self._download_webpage(
            streams_url, video_id, note=u'Downloading stream list')

        streams_doc = xml.etree.ElementTree.fromstring(
            streams_webpage.encode('utf-8'))
        video_url = next(n.text
                         for n in streams_doc.findall('.//choice/url')
                         if u'//ad.doubleclick' not in n.text)
        if video_url.endswith('/Unavailable.flv'):
            raise ExtractorError(
                u'Access to this video is blocked from outside of Canada',
                expected=True)

        duration_str = self._html_search_meta(
            'video:duration', webpage, u'duration')
        duration = int(duration_str) if duration_str else None
        upload_date_str = self._html_search_meta(
            'video:release_date', webpage, u'upload date')
        upload_date = unified_strdate(upload_date_str) if upload_date_str else None

        return {
            'id': video_id,
            'title': self._og_search_title(webpage),
            'url': video_url,
            'description': self._og_search_description(webpage),
            'uploader': self._dc_search_uploader(webpage),
            'thumbnail': self._og_search_thumbnail(webpage),
            'age_limit': self._media_rating_search(webpage),
            'duration': duration,
            'upload_date': upload_date,
            'ext': 'mp4',
        }
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@ -734,6 +734,8 @@ def unified_strdate(date_str):
        '%Y/%m/%d %H:%M:%S',
        '%d.%m.%Y %H:%M',
        '%Y-%m-%dT%H:%M:%SZ',
        '%Y-%m-%dT%H:%M:%S.%fZ',
        '%Y-%m-%dT%H:%M:%S.%f0Z',
        '%Y-%m-%dT%H:%M:%S',
    ]
    for expression in format_expressions: