[ooyala] extract more formats and metadata

9 years ago · 90bddb6cdd
--- a/youtube_dl/extractor/ooyala.py
+++ b/youtube_dl/extractor/ooyala.py
@ -1,108 +1,64 @@
 from __future__ import unicode_literals
 import re
 import json
 import base64

 from .common import InfoExtractor
 from ..utils import (
    unescapeHTML,
    ExtractorError,
    determine_ext,
    int_or_none,
    float_or_none,
 )


 class OoyalaBaseIE(InfoExtractor):

    def _extract_result(self, info, more_info):
        embedCode = info['embedCode']
        video_url = info.get('ipad_url') or info['url']

        if determine_ext(video_url) == 'm3u8':
            formats = self._extract_m3u8_formats(video_url, embedCode, ext='mp4')
        else:
            formats = [{
                'url': video_url,
                'ext': 'mp4',
            }]

        return {
            'id': embedCode,
            'title': unescapeHTML(info['title']),
            'formats': formats,
            'description': unescapeHTML(more_info['description']),
            'thumbnail': more_info['promo'],
    def _extract(self, player_url, video_id):
        print(player_url)
        content_tree = self._download_json(player_url, video_id)['content_tree']
        metadata = content_tree[list(content_tree)[0]]
        embed_code = metadata['embed_code']
        pcode = metadata.get('asset_pcode') or embed_code
        video_info = {
            'id': embed_code,
            'title': metadata['title'],
            'description': metadata.get('description'),
            'thumbnail': metadata.get('thumbnail_image') or metadata.get('promo_image'),
            'duration': int_or_none(metadata.get('duration')),
        }

    def _extract(self, player_url, video_id):
        player = self._download_webpage(player_url, video_id)
        mobile_url = self._search_regex(r'mobile_player_url="(.+?)&device="',
                                        player, 'mobile player url')
        # Looks like some videos are only available for particular devices
        # (e.g. http://player.ooyala.com/player.js?embedCode=x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0
        # is only available for ipad)
        # Working around with fetching URLs for all the devices found starting with 'unknown'
        # until we succeed or eventually fail for each device.
        devices = re.findall(r'device\s*=\s*"([^"]+)";', player)
        devices.remove('unknown')
        devices.insert(0, 'unknown')
        for device in devices:
            mobile_player = self._download_webpage(
                '%s&device=%s' % (mobile_url, device), video_id,
                'Downloading mobile player JS for %s device' % device)
            videos_info = self._search_regex(
                r'var streams=window.oo_testEnv\?\[\]:eval\("\((\[{.*?}\])\)"\);',
                mobile_player, 'info', fatal=False, default=None)
            if videos_info:
                break

        if not videos_info:
            formats = []
        formats = []
        for supported_format in ('mp4', 'm3u8', 'hds', 'rtmp'):
            auth_data = self._download_json(
                'http://player.ooyala.com/sas/player_api/v1/authorization/embed_code/%s/%s?domain=www.example.org&supportedFormats=mp4,webm' % (video_id, video_id),
                video_id)
                'http://player.ooyala.com/sas/player_api/v1/authorization/embed_code/%s/%s?domain=www.example.org&supportedFormats=%s' % (pcode, embed_code, supported_format),
                video_id, 'Downloading %s JSON' % supported_format)

            cur_auth_data = auth_data['authorization_data'][video_id]
            cur_auth_data = auth_data['authorization_data'][embed_code]

            for stream in cur_auth_data['streams']:
                formats.append({
                    'url': base64.b64decode(stream['url']['data'].encode('ascii')).decode('utf-8'),
                    'ext': stream.get('delivery_type'),
                    'format': stream.get('video_codec'),
                    'format_id': stream.get('profile'),
                    'width': int_or_none(stream.get('width')),
                    'height': int_or_none(stream.get('height')),
                    'abr': int_or_none(stream.get('audio_bitrate')),
                    'vbr': int_or_none(stream.get('video_bitrate')),
                })
            if formats:
                return {
                    'id': video_id,
                    'formats': formats,
                    'title': 'Ooyala video',
                }

            if not cur_auth_data['authorized']:
                raise ExtractorError(cur_auth_data['message'], expected=True)

        if not videos_info:
            raise ExtractorError('Unable to extract info')
        videos_info = videos_info.replace('\\"', '"')
        videos_more_info = self._search_regex(
            r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, 'more info').replace('\\"', '"')
        videos_info = json.loads(videos_info)
        videos_more_info = json.loads(videos_more_info)

        if videos_more_info.get('lineup'):
            videos = [self._extract_result(info, more_info) for (info, more_info) in zip(videos_info, videos_more_info['lineup'])]
            return {
                '_type': 'playlist',
                'id': video_id,
                'title': unescapeHTML(videos_more_info['title']),
                'entries': videos,
            }
        else:
            return self._extract_result(videos_info[0], videos_more_info)
                url = base64.b64decode(stream['url']['data'].encode('ascii')).decode('utf-8')
                delivery_type = stream['delivery_type']
                if delivery_type == 'remote_asset':
                    video_info['url'] = url
                    return video_info
                if delivery_type == 'hls':
                    formats.extend(self._extract_m3u8_formats(url, embed_code, 'mp4', 'm3u8_native', 0, m3u8_id='hls', fatal=False))
                elif delivery_type == 'hds':
                    formats.extend(self._extract_f4m_formats(url, embed_code, f4m_id='hds', fatal=False))
                else:
                    formats.append({
                        'url': url,
                        'ext': stream.get('delivery_type'),
                        'vcodec': stream.get('video_codec'),
                        'format_id': stream.get('profile'),
                        'width': int_or_none(stream.get('width')),
                        'height': int_or_none(stream.get('height')),
                        'abr': int_or_none(stream.get('audio_bitrate')),
                        'vbr': int_or_none(stream.get('video_bitrate')),
                        'fps': float_or_none(stream.get('framerate')),
                    })
        self._sort_formats(formats)

        video_info['formats'] = formats
        return video_info


 class OoyalaIE(OoyalaBaseIE):
@ -117,6 +73,7 @@ class OoyalaIE(OoyalaBaseIE):
                'ext': 'mp4',
                'title': 'Explaining Data Recovery from Hard Drives and SSDs',
                'description': 'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.',
                'duration': 853386,
            },
        }, {
            # Only available for ipad
@ -125,7 +82,7 @@ class OoyalaIE(OoyalaBaseIE):
                'id': 'x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0',
                'ext': 'mp4',
                'title': 'Simulation Overview - Levels of Simulation',
                'description': '',
                'duration': 194948,
            },
        },
        {
@ -136,7 +93,8 @@ class OoyalaIE(OoyalaBaseIE):
            'info_dict': {
                'id': 'FiOG81ZTrvckcchQxmalf4aQj590qTEx',
                'ext': 'mp4',
                'title': 'Ooyala video',
                'title': 'Divide Tool Path.mp4',
                'duration': 204405,
            }
        }
    ]
@ -152,8 +110,8 @@ class OoyalaIE(OoyalaBaseIE):

    def _real_extract(self, url):
        embed_code = self._match_id(url)
        player_url = 'http://player.ooyala.com/player.js?embedCode=%s' % embed_code
        return self._extract(player_url, embed_code)
        content_tree_url = 'http://player.ooyala.com/player_api/v1/content_tree/embed_code/%s/%s' % (embed_code, embed_code)
        return self._extract(content_tree_url, embed_code)


 class OoyalaExternalIE(OoyalaBaseIE):
@ -170,7 +128,7 @@ class OoyalaExternalIE(OoyalaBaseIE):
                        .*?&pcode=
                    )
                    (?P<pcode>.+?)
                    (&|$)
                    (?:&|$)
                    '''

    _TEST = {
@ -179,7 +137,7 @@ class OoyalaExternalIE(OoyalaBaseIE):
            'id': 'FkYWtmazr6Ed8xmvILvKLWjd4QvYZpzG',
            'ext': 'mp4',
            'title': 'dm_140128_30for30Shorts___JudgingJewellv2',
            'description': '',
            'duration': 1302000,
        },
        'params': {
            # m3u8 download
@ -188,9 +146,6 @@ class OoyalaExternalIE(OoyalaBaseIE):
    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        partner_id = mobj.group('partner_id')
        video_id = mobj.group('id')
        pcode = mobj.group('pcode')
        player_url = 'http://player.ooyala.com/player.js?externalId=%s:%s&pcode=%s' % (partner_id, video_id, pcode)
        return self._extract(player_url, video_id)
        partner_id, video_id, pcode = re.match(self._VALID_URL, url).groups()
        content_tree_url = 'http://player.ooyala.com/player_api/v1/content_tree/external_id/%s/%s:%s' % (pcode, partner_id, video_id)
        return self._extract(content_tree_url, video_id)