From f063a04f079f7af0078a02da39586b5e71a6c0b1 Mon Sep 17 00:00:00 2001 From: Magnus Kolstad Date: Fri, 5 Sep 2014 11:24:30 +0200 Subject: [PATCH 1/2] [dbtv] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/dbtv.py | 76 ++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+) create mode 100644 youtube_dl/extractor/dbtv.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index e49ac3e52..c43dfd7ea 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -62,6 +62,7 @@ from .dailymotion import ( DailymotionUserIE, ) from .daum import DaumIE +from .dbtv import DBTVIE from .dotsub import DotsubIE from .dreisat import DreiSatIE from .drtv import DRTVIE diff --git a/youtube_dl/extractor/dbtv.py b/youtube_dl/extractor/dbtv.py new file mode 100644 index 000000000..cf76dbf05 --- /dev/null +++ b/youtube_dl/extractor/dbtv.py @@ -0,0 +1,76 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + +from ..utils import ( + ExtractorError +) + +class DBTVIE(InfoExtractor): + _VALID_URL = r'http://dbtv.no/(?P[0-9]+)/?(?P.*)$' + _TEST = { + 'url': 'http://dbtv.no/3649835190001#Skulle_teste_ut_fornøyelsespark,_men_kollegaen_var_bare_opptatt_av_bikinikroppen', + 'md5': 'b89953ed25dacb6edb3ef6c6f430f8bc', + 'info_dict': { + 'id': '3649835190001', + 'ext': 'mp4', + 'title': 'Skulle teste ut fornøyelsespark, men kollegaen var bare opptatt av bikinikroppen', + 'description': 'md5:d681bf2bb7dd3503892cedb9c2d0e6f2', + 'thumbnail': 'http://gfx.dbtv.no/thumbs/still/33100.jpg', + 'timestamp': 1404039863, + 'upload_date': '20140629', + 'duration': 69544, + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + # Download JSON file containing video info. + data = self._download_json('http://api.dbtv.no/discovery/%s' % video_id, video_id, 'Downloading media JSON') + # We only want the first video in the JSON API file. + video = data['playlist'][0] + + # Check for full HD video, else use the standard video URL + for i in range(0, len(video['renditions'])): + if int(video['renditions'][i]['width']) == 1280: + video_url = video['renditions'][i]['URL'] + break + else: + video_url = video['URL'] + + # Add access token to image or it will fail. + thumbnail = video['splash'] + + # Duration int. + duration = int(video['length']) + + # Timestamp is given in milliseconds. + timestamp = float(str(video['publishedAt'])[0:-3]) + + formats = [] + + # Video URL. + if video['URL'] is not None: + formats.append({ + 'url': video_url, + 'format_id': 'mp4', + 'ext': 'mp4' + }) + else: + raise ExtractorError('No download URL found for video: %s.' % video_id, expected=True) + + return { + 'id': video_id, + 'title': video['title'], + 'description': video['desc'], + 'thumbnail': thumbnail, + 'timestamp': timestamp, + 'duration': duration, + 'view_count': video['views'], + 'formats': formats, + } From 4d067a58ca6b4ba2bfe2c9cb17e0ab1ad9e70604 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 5 Sep 2014 19:53:53 +0700 Subject: [PATCH 2/2] [dbtv] Simplify, modernize, extract all formats --- youtube_dl/extractor/dbtv.py | 114 +++++++++++++++++------------------ 1 file changed, 56 insertions(+), 58 deletions(-) diff --git a/youtube_dl/extractor/dbtv.py b/youtube_dl/extractor/dbtv.py index cf76dbf05..1d3e2ff08 100644 --- a/youtube_dl/extractor/dbtv.py +++ b/youtube_dl/extractor/dbtv.py @@ -4,73 +4,71 @@ from __future__ import unicode_literals import re from .common import InfoExtractor - from ..utils import ( - ExtractorError + float_or_none, + int_or_none, + clean_html, ) + class DBTVIE(InfoExtractor): - _VALID_URL = r'http://dbtv.no/(?P[0-9]+)/?(?P.*)$' - _TEST = { - 'url': 'http://dbtv.no/3649835190001#Skulle_teste_ut_fornøyelsespark,_men_kollegaen_var_bare_opptatt_av_bikinikroppen', - 'md5': 'b89953ed25dacb6edb3ef6c6f430f8bc', - 'info_dict': { - 'id': '3649835190001', - 'ext': 'mp4', - 'title': 'Skulle teste ut fornøyelsespark, men kollegaen var bare opptatt av bikinikroppen', - 'description': 'md5:d681bf2bb7dd3503892cedb9c2d0e6f2', - 'thumbnail': 'http://gfx.dbtv.no/thumbs/still/33100.jpg', - 'timestamp': 1404039863, - 'upload_date': '20140629', - 'duration': 69544, + _VALID_URL = r'http://dbtv\.no/(?P[0-9]+)#(?P.+)' + _TEST = { + 'url': 'http://dbtv.no/3649835190001#Skulle_teste_ut_fornøyelsespark,_men_kollegaen_var_bare_opptatt_av_bikinikroppen', + 'md5': 'b89953ed25dacb6edb3ef6c6f430f8bc', + 'info_dict': { + 'id': '33100', + 'display_id': 'Skulle_teste_ut_fornøyelsespark,_men_kollegaen_var_bare_opptatt_av_bikinikroppen', + 'ext': 'mp4', + 'title': 'Skulle teste ut fornøyelsespark, men kollegaen var bare opptatt av bikinikroppen', + 'description': 'md5:1504a54606c4dde3e4e61fc97aa857e0', + 'thumbnail': 're:https?://.*\.jpg$', + 'timestamp': 1404039863.438, + 'upload_date': '20140629', + 'duration': 69.544, + 'view_count': int, + 'categories': list, + } } - } - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - # Download JSON file containing video info. - data = self._download_json('http://api.dbtv.no/discovery/%s' % video_id, video_id, 'Downloading media JSON') - # We only want the first video in the JSON API file. - video = data['playlist'][0] + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + display_id = mobj.group('display_id') - # Check for full HD video, else use the standard video URL - for i in range(0, len(video['renditions'])): - if int(video['renditions'][i]['width']) == 1280: - video_url = video['renditions'][i]['URL'] - break - else: - video_url = video['URL'] + data = self._download_json( + 'http://api.dbtv.no/discovery/%s' % video_id, display_id) - # Add access token to image or it will fail. - thumbnail = video['splash'] + video = data['playlist'][0] - # Duration int. - duration = int(video['length']) + formats = [{ + 'url': f['URL'], + 'vcodec': f.get('container'), + 'width': int_or_none(f.get('width')), + 'height': int_or_none(f.get('height')), + 'vbr': float_or_none(f.get('rate'), 1000), + 'filesize': int_or_none(f.get('size')), + } for f in video['renditions'] if 'URL' in f] - # Timestamp is given in milliseconds. - timestamp = float(str(video['publishedAt'])[0:-3]) + if not formats: + for url_key, format_id in [('URL', 'mp4'), ('HLSURL', 'hls')]: + if url_key in video: + formats.append({ + 'url': video[url_key], + 'format_id': format_id, + }) - formats = [] + self._sort_formats(formats) - # Video URL. - if video['URL'] is not None: - formats.append({ - 'url': video_url, - 'format_id': 'mp4', - 'ext': 'mp4' - }) - else: - raise ExtractorError('No download URL found for video: %s.' % video_id, expected=True) - - return { - 'id': video_id, - 'title': video['title'], - 'description': video['desc'], - 'thumbnail': thumbnail, - 'timestamp': timestamp, - 'duration': duration, - 'view_count': video['views'], - 'formats': formats, - } + return { + 'id': video['id'], + 'display_id': display_id, + 'title': video['title'], + 'description': clean_html(video['desc']), + 'thumbnail': video.get('splash') or video.get('thumb'), + 'timestamp': float_or_none(video.get('publishedAt'), 1000), + 'duration': float_or_none(video.get('length'), 1000), + 'view_count': int_or_none(video.get('views')), + 'categories': video.get('tags'), + 'formats': formats, + }