Browse Source

[tv2] Add extractor (#5724)

totalwebcasting
Sergey M․ 10 years ago
parent
commit
bc0f937b55
2 changed files with 94 additions and 0 deletions
  1. +1
    -0
      youtube_dl/extractor/__init__.py
  2. +93
    -0
      youtube_dl/extractor/tv2.py

+ 1
- 0
youtube_dl/extractor/__init__.py View File

@ -572,6 +572,7 @@ from .tumblr import TumblrIE
from .tunein import TuneInIE
from .turbo import TurboIE
from .tutv import TutvIE
from .tv2 import TV2IE
from .tv4 import TV4IE
from .tvigle import TvigleIE
from .tvp import TvpIE, TvpSeriesIE


+ 93
- 0
youtube_dl/extractor/tv2.py View File

@ -0,0 +1,93 @@
# encoding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
determine_ext,
int_or_none,
float_or_none,
parse_iso8601,
)
class TV2IE(InfoExtractor):
_VALID_URL = 'http://(?:www\.)?tv2\.no/v/(?P<id>\d+)'
_TEST = {
'url': 'http://www.tv2.no/v/916509/',
'md5': '9cb9e3410b18b515d71892f27856e9b1',
'info_dict': {
'id': '916509',
'ext': 'flv',
'title': 'Se Gryttens hyllest av Steven Gerrard',
'description': 'TV 2 Sportens huspoet tar avskjed med Liverpools kaptein Steven Gerrard.',
'timestamp': 1431715610,
'upload_date': '20150515',
'duration': 156.967,
'view_count': int,
'categories': list,
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
formats = []
format_urls = []
for protocol in ('HDS', 'HLS'):
data = self._download_json(
'http://sumo.tv2.no/api/web/asset/%s/play.json?protocol=%s&videoFormat=SMIL+ISMUSP' % (video_id, protocol),
video_id, 'Downloading play JSON')['playback']
for item in data['items']['item']:
video_url = item.get('url')
if not video_url or video_url in format_urls:
continue
format_id = '%s-%s' % (protocol.lower(), item.get('mediaFormat'))
if not self._is_valid_url(video_url, video_id, format_id):
continue
format_urls.append(video_url)
ext = determine_ext(video_url)
if ext == 'f4m':
formats.extend(self._extract_f4m_formats(
video_url, video_id, f4m_id=format_id))
elif ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
video_url, video_id, 'mp4', m3u8_id=format_id))
elif ext == 'ism' or video_url.endswith('.ism/Manifest'):
pass
else:
formats.append({
'url': video_url,
'format_id': format_id,
'tbr': int_or_none(item.get('bitrate')),
'filesize': int_or_none(item.get('fileSize')),
})
self._sort_formats(formats)
asset = self._download_json(
'http://sumo.tv2.no/api/web/asset/%s.json' % video_id,
video_id, 'Downloading metadata JSON')['asset']
title = asset['title']
description = asset.get('description')
timestamp = parse_iso8601(asset.get('createTime'))
duration = float_or_none(asset.get('accurateDuration') or asset.get('duration'))
view_count = int_or_none(asset.get('views'))
categories = asset.get('keywords', '').split(',')
thumbnails = [{
'id': thumbnail.get('@type'),
'url': thumbnail.get('url'),
} for _, thumbnail in asset.get('imageVersions', {}).items()]
return {
'id': video_id,
'url': video_url,
'title': title,
'description': description,
'thumbnails': thumbnails,
'timestamp': timestamp,
'duration': duration,
'view_count': view_count,
'categories': categories,
'formats': formats,
}

Loading…
Cancel
Save