[dramefever] Improve and simplify

10 years ago · 0029071adb
--- a/youtube_dl/extractor/dramafever.py
+++ b/youtube_dl/extractor/dramafever.py
@ -1,104 +1,111 @@
 # encoding: utf-8
 from __future__ import unicode_literals

 import re
 import itertools

 from .common import InfoExtractor
 from ..compat import (
    compat_HTTPError,
    compat_urlparse,
 )
 from ..utils import (
    ExtractorError,
    clean_html,
    determine_ext,
    int_or_none,
    parse_iso8601,
 )


 class DramaFeverIE(InfoExtractor):
    IE_NAME = 'dramafever'
    _VALID_URL = r'^https?://(?:www\.)?dramafever\.com/drama/(?P<id>[0-9]+/[0-9]+)/'
    _TESTS = [{
    _VALID_URL = r'https?://(?:www\.)?dramafever\.com/drama/(?P<id>[0-9]+/[0-9]+)'
    _TEST = {
        'url': 'http://www.dramafever.com/drama/4512/1/Cooking_with_Shin/',
        'info_dict': {
            'id': '4512.1',
            'ext': 'flv',
            'title': 'Cooking with Shin 4512.1',
            'description': 'md5:a8eec7942e1664a6896fcd5e1287bfd0',
            'thumbnail': 're:^https?://.*\.jpg',
            'timestamp': 1404336058,
            'upload_date': '20140702',
            'description': 'Served at all special occasions and featured in the hit drama Heirs, Shin cooks Red Bean Rice.',
            'duration': 343,
        }
    }]
    }

    def _real_extract(self, url):
        video_id = self._match_id(url).replace("/", ".")

        consumer_secret = self._get_consumer_secret(video_id)
        video_id = self._match_id(url).replace('/', '.')

        ep_json = self._download_json(
            "http://www.dramafever.com/amp/episode/feed.json?guid=%s" % video_id,
            video_id, note='Downloading episode metadata',
            errnote="Video may not be available for your location")["channel"]["item"]
        try:
            feed = self._download_json(
                'http://www.dramafever.com/amp/episode/feed.json?guid=%s' % video_id,
                video_id, 'Downloading episode JSON')['channel']['item']
        except ExtractorError as e:
            if isinstance(e.cause, compat_HTTPError):
                raise ExtractorError(
                    'Currently unavailable in your country.', expected=True)
            raise

        title = ep_json["media-group"]["media-title"]
        description = ep_json["media-group"]["media-description"]
        thumbnail = ep_json["media-group"]["media-thumbnail"]["@attributes"]["url"]
        duration = int(ep_json["media-group"]["media-content"][0]["@attributes"]["duration"])
        mobj = re.match(r"([0-9]{4})-([0-9]{2})-([0-9]{2})", ep_json["pubDate"])
        upload_date = mobj.group(1) + mobj.group(2) + mobj.group(3) if mobj is not None else None
        media_group = feed.get('media-group', {})

        formats = []
        for vid_format in ep_json["media-group"]["media-content"]:
            src = vid_format["@attributes"]["url"]
            if '.f4m' in src:
                formats.extend(self._extract_f4m_formats(src, video_id))

        for media_content in media_group['media-content']:
            src = media_content.get('@attributes', {}).get('url')
            if not src:
                continue
            ext = determine_ext(src)
            if ext == 'f4m':
                formats.extend(self._extract_f4m_formats(
                    src, video_id, f4m_id='hds'))
            elif ext == 'm3u8':
                formats.extend(self._extract_m3u8_formats(
                    src, video_id, 'mp4', m3u8_id='hls'))
            else:
                formats.append({
                    'url': src,
                })
        self._sort_formats(formats)
        video_subtitles = self.extract_subtitles(video_id, consumer_secret)

        title = media_group.get('media-title')
        description = media_group.get('media-description')
        duration = int_or_none(media_group['media-content'][0].get('@attributes', {}).get('duration'))
        thumbnail = self._proto_relative_url(
            media_group.get('media-thumbnail', {}).get('@attributes', {}).get('url'))
        timestamp = parse_iso8601(feed.get('pubDate'), ' ')

        subtitles = {}
        for media_subtitle in media_group.get('media-subTitle', []):
            lang = media_subtitle.get('@attributes', {}).get('lang')
            href = media_subtitle.get('@attributes', {}).get('href')
            if not lang or not href:
                continue
            subtitles[lang] = [{
                'ext': 'ttml',
                'url': href,
            }]

        return {
            'id': video_id,
            'title': title,
            'description': description,
            'thumbnail': thumbnail,
            'upload_date': upload_date,
            'timestamp': timestamp,
            'duration': duration,
            'formats': formats,
            'subtitles': video_subtitles,
            'subtitles': subtitles,
        }

    def _get_consumer_secret(self, video_id):
        df_js = self._download_webpage(
            "http://www.dramafever.com/static/126960d/v2/js/plugins/jquery.threadedcomments.js", video_id)
        return self._search_regex(r"'cs': '([0-9a-zA-Z]+)'", df_js, "cs")

    def _get_episodes(self, series_id, consumer_secret, episode_filter=None):
        _PAGE_SIZE = 60

        curr_page = 1
        max_pages = curr_page + 1
        results = []
        while max_pages >= curr_page:
            page_url = "http://www.dramafever.com/api/4/episode/series/?cs=%s&series_id=%s&page_size=%d&page_number=%d" % \
                       (consumer_secret, series_id, _PAGE_SIZE, curr_page)
            series = self._download_json(
                page_url, series_id, note="Downloading series json page #%d" % curr_page)
            max_pages = series['num_pages']
            results.extend([ep for ep in series['value'] if episode_filter is None or episode_filter(ep)])
            curr_page += 1
        return results

    def _get_subtitles(self, video_id, consumer_secret):

        res = None
        info = self._get_episodes(
            video_id.split(".")[0], consumer_secret,
            episode_filter=lambda x: x['guid'] == video_id)

        if len(info) == 1 and info[0]['subfile'] != '':
            res = {'en': [{'url': info[0]['subfile'], 'ext': 'srt'}]}
        return res


 class DramaFeverSeriesIE(DramaFeverIE):

 class DramaFeverSeriesIE(InfoExtractor):
    IE_NAME = 'dramafever:series'
    _VALID_URL = r'^https?://(?:www\.)?dramafever\.com/drama/(?P<id>[0-9]+)/\d*[a-zA-Z_][a-zA-Z0-9_]*/'
    _VALID_URL = r'https?://(?:www\.)?dramafever\.com/drama/(?P<id>[0-9]+)(?:/(?:(?!\d).+)?)?$'
    _TESTS = [{
        'url': 'http://www.dramafever.com/drama/4512/Cooking_with_Shin/',
        'info_dict': {
            'id': '4512',
            'title': 'Cooking with Shin',
            'description': 'Professional chef and cooking instructor Shin Kim takes some of the delicious dishes featured in your favorite dramas and shows you how to make them right at home.',
            'description': 'md5:84a3f26e3cdc3fb7f500211b3593b5c1',
        },
        'playlist_count': 4,
    }, {
@ -106,25 +113,48 @@ class DramaFeverSeriesIE(DramaFeverIE):
        'info_dict': {
            'id': '124',
            'title': 'IRIS',
            'description': 'Lee Byung Hun and Kim Tae Hee star in this powerhouse drama and ratings megahit of action, intrigue and romance.',
            'description': 'md5:b3a30e587cf20c59bd1c01ec0ee1b862',
        },
        'playlist_count': 20,
    }]

    _CONSUMER_SECRET = 'DA59dtVXYLxajktV'
    _PAGE_SIZE = 5  # max is 60 (see http://api.drama9.com/#get--api-4-episode-series-)

    def _get_consumer_secret(self, video_id):
        mainjs = self._download_webpage(
            'http://www.dramafever.com/static/51afe95/df2014/scripts/main.js',
            video_id, 'Downloading main.js', fatal=False)
        if not mainjs:
            return self._CONSUMER_SECRET
        return self._search_regex(
            r"var\s+cs\s*=\s*'([^']+)'", mainjs,
            'consumer secret', default=self._CONSUMER_SECRET)

    def _real_extract(self, url):
        series_id = self._match_id(url)

        consumer_secret = self._get_consumer_secret(series_id)

        series_json = self._download_json(
            "http://www.dramafever.com/api/4/series/query/?cs=%s&series_id=%s" % (consumer_secret, series_id),
            series_id, note='Downloading series metadata')["series"][series_id]
        series = self._download_json(
            'http://www.dramafever.com/api/4/series/query/?cs=%s&series_id=%s'
            % (consumer_secret, series_id),
            series_id, 'Downloading series JSON')['series'][series_id]

        title = series_json["name"]
        description = series_json["description_short"]
        title = clean_html(series['name'])
        description = clean_html(series.get('description') or series.get('description_short'))

        episodes = self._get_episodes(series_id, consumer_secret)
        entries = []
        for ep in episodes:
            entries.append(self.url_result(
                'http://www.dramafever.com%s' % ep['episode_url'], 'DramaFever', ep['guid']))
        for page_num in itertools.count(1):
            episodes = self._download_json(
                'http://www.dramafever.com/api/4/episode/series/?cs=%s&series_id=%s&page_size=%d&page_number=%d'
                % (consumer_secret, series_id, self._PAGE_SIZE, page_num),
                series_id, 'Downloading episodes JSON page #%d' % page_num)
            for episode in episodes.get('value', []):
                entries.append(self.url_result(
                    compat_urlparse.urljoin(url, episode['episode_url']),
                    'DramaFever', episode.get('guid')))
            if page_num == episodes['num_pages']:
                break

        return self.playlist_result(entries, series_id, title, description)