Merge branch 'akretz-master'

10 years ago · 4542535f94
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@ -439,7 +439,7 @@ class InfoExtractor(object):
        return video_info

    @staticmethod
    def playlist_result(entries, playlist_id=None, playlist_title=None):
    def playlist_result(entries, playlist_id=None, playlist_title=None, playlist_description=None):
        """Returns a playlist"""
        video_info = {'_type': 'playlist',
                      'entries': entries}
@ -447,6 +447,8 @@ class InfoExtractor(object):
            video_info['id'] = playlist_id
        if playlist_title:
            video_info['title'] = playlist_title
        if playlist_description:
            video_info['description'] = playlist_description
        return video_info

    def _search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0, group=None):
--- a/youtube_dl/extractor/prosiebensat1.py
+++ b/youtube_dl/extractor/prosiebensat1.py
@ -85,7 +85,7 @@ class ProSiebenSat1IE(InfoExtractor):
                'ext': 'mp4',
                'title': 'Im Interview: Kai Wiesinger',
                'description': 'md5:e4e5370652ec63b95023e914190b4eb9',
                'upload_date': '20140225',
                'upload_date': '20140203',
                'duration': 522.56,
            },
            'params': {
@ -100,7 +100,7 @@ class ProSiebenSat1IE(InfoExtractor):
                'ext': 'mp4',
                'title': 'Jagd auf Fertigkost im Elsthal - Teil 2',
                'description': 'md5:2669cde3febe9bce13904f701e774eb6',
                'upload_date': '20140225',
                'upload_date': '20141014',
                'duration': 2410.44,
            },
            'params': {
@ -152,12 +152,22 @@ class ProSiebenSat1IE(InfoExtractor):
                'skip_download': True,
            },
        },
        {
            'url': 'http://www.prosieben.de/tv/joko-gegen-klaas/videos/playlists/episode-8-ganze-folge-playlist',
            'info_dict': {
                'id': '439664',
                'title': 'Episode 8 - Ganze Folge - Playlist',
                'description': 'md5:63b8963e71f481782aeea877658dec84',
            },
            'playlist_count': 2,
        },
    ]

    _CLIPID_REGEXES = [
        r'"clip_id"\s*:\s+"(\d+)"',
        r'clipid: "(\d+)"',
        r'clip[iI]d=(\d+)',
        r"'itemImageUrl'\s*:\s*'/dynamic/thumbnails/full/\d+/(\d+)",
    ]
    _TITLE_REGEXES = [
        r'<h2 class="subtitle" itemprop="name">\s*(.+?)</h2>',
@ -178,11 +188,19 @@ class ProSiebenSat1IE(InfoExtractor):
        r'<span style="padding-left: 4px;line-height:20px; color:#404040">(\d{2}\.\d{2}\.\d{4})</span>',
        r'(\d{2}\.\d{2}\.\d{4}) \| \d{2}:\d{2} Min<br/>',
    ]
    _PAGE_TYPE_REGEXES = [
        r'<meta name="page_type" content="([^"]+)">',
        r"'itemType'\s*:\s*'([^']*)'",
    ]
    _PLAYLIST_ID_REGEXES = [
        r'content[iI]d=(\d+)',
        r"'itemId'\s*:\s*'([^']*)'",
    ]
    _PLAYLIST_CLIP_REGEXES = [
        r'(?s)data-qvt=.+?<a href="([^"]+)"',
    ]

    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)

    def _extract_clip(self, url, webpage):
        clip_id = self._html_search_regex(self._CLIPID_REGEXES, webpage, 'clip id')

        access_token = 'testclient'
@ -281,3 +299,31 @@ class ProSiebenSat1IE(InfoExtractor):
            'duration': duration,
            'formats': formats,
        }

    def _extract_playlist(self, url, webpage):
        playlist_id = self._html_search_regex(
            self._PLAYLIST_ID_REGEXES, webpage, 'playlist id')
        for regex in self._PLAYLIST_CLIP_REGEXES:
            playlist_clips = re.findall(regex, webpage)
            if playlist_clips:
                title = self._html_search_regex(
                    self._TITLE_REGEXES, webpage, 'title')
                description = self._html_search_regex(
                    self._DESCRIPTION_REGEXES, webpage, 'description', fatal=False)
                entries = [
                    self.url_result(
                        re.match('(.+?//.+?)/', url).group(1) + clip_path,
                        'ProSiebenSat1')
                    for clip_path in playlist_clips]
                return self.playlist_result(entries, playlist_id, title, description)

    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        page_type = self._search_regex(
            self._PAGE_TYPE_REGEXES, webpage,
            'page type', default='clip').lower()
        if page_type == 'clip':
            return self._extract_clip(url, webpage)
        elif page_type == 'playlist':
            return self._extract_playlist(url, webpage)