Browse Source

Merge pull request #7322 from remitamine/vgtv

[vgtv] extract videos from FTV, Aftenposten, Aftonbladet using VGTVIE
totalwebcasting
remitamine 9 years ago
parent
commit
31d9ea4a3e
4 changed files with 122 additions and 82 deletions
  1. +0
    -1
      youtube_dl/extractor/__init__.py
  2. +0
    -23
      youtube_dl/extractor/aftenposten.py
  3. +113
    -53
      youtube_dl/extractor/vgtv.py
  4. +9
    -5
      youtube_dl/extractor/xstream.py

+ 0
- 1
youtube_dl/extractor/__init__.py View File

@ -15,7 +15,6 @@ from .adobetv import (
AdobeTVVideoIE, AdobeTVVideoIE,
) )
from .adultswim import AdultSwimIE from .adultswim import AdultSwimIE
from .aftenposten import AftenpostenIE
from .aftonbladet import AftonbladetIE from .aftonbladet import AftonbladetIE
from .airmozilla import AirMozillaIE from .airmozilla import AirMozillaIE
from .aljazeera import AlJazeeraIE from .aljazeera import AlJazeeraIE


+ 0
- 23
youtube_dl/extractor/aftenposten.py View File

@ -1,23 +0,0 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
class AftenpostenIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?aftenposten\.no/webtv/(?:#!/)?video/(?P<id>\d+)'
_TEST = {
'url': 'http://www.aftenposten.no/webtv/#!/video/21039/trailer-sweatshop-i-can-t-take-any-more',
'md5': 'fd828cd29774a729bf4d4425fe192972',
'info_dict': {
'id': '21039',
'ext': 'mov',
'title': 'TRAILER: "Sweatshop" - I can´t take any more',
'description': 'md5:21891f2b0dd7ec2f78d84a50e54f8238',
'timestamp': 1416927969,
'upload_date': '20141125',
}
}
def _real_extract(self, url):
return self.url_result('xstream:ap:%s' % self._match_id(url), 'Xstream')

+ 113
- 53
youtube_dl/extractor/vgtv.py View File

@ -4,26 +4,48 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from .xstream import XstreamIE
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
float_or_none, float_or_none,
) )
class VGTVIE(InfoExtractor):
IE_DESC = 'VGTV and BTTV'
class VGTVIE(XstreamIE):
IE_DESC = 'VGTV, BTTV, FTV, Aftenposten and Aftonbladet'
_HOST_TO_APPNAME = {
'vgtv.no': 'vgtv',
'bt.no/tv': 'bttv',
'aftenbladet.no/tv': 'satv',
'fvn.no/fvntv': 'fvntv',
'aftenposten.no/webtv': 'aptv',
}
_APP_NAME_TO_VENDOR = {
'vgtv': 'vgtv',
'bttv': 'bt',
'satv': 'sa',
'fvntv': 'fvn',
'aptv': 'ap',
}
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
(?:
vgtv:|
http://(?:www\.)?
(?:https?://(?:www\.)?
(?P<host>
%s
) )
(?P<host>vgtv|bt)
/
(?: (?:
:|
\.no/(?:tv/)?\#!/(?:video|live)/
)
(?P<id>[0-9]+)
'''
\#!/(?:video|live)/|
embed?.*id=
)|
(?P<appname>
%s
):)
(?P<id>\d+)
''' % ('|'.join(_HOST_TO_APPNAME.keys()), '|'.join(_APP_NAME_TO_VENDOR.keys()))
_TESTS = [ _TESTS = [
{ {
# streamType: vod # streamType: vod
@ -59,17 +81,18 @@ class VGTVIE(InfoExtractor):
# m3u8 download # m3u8 download
'skip_download': True, 'skip_download': True,
}, },
'skip': 'Video is no longer available',
}, },
{ {
# streamType: live
# streamType: wasLive
'url': 'http://www.vgtv.no/#!/live/113063/direkte-v75-fra-solvalla', 'url': 'http://www.vgtv.no/#!/live/113063/direkte-v75-fra-solvalla',
'info_dict': { 'info_dict': {
'id': '113063', 'id': '113063',
'ext': 'flv',
'title': 're:^DIREKTE: V75 fra Solvalla [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'ext': 'mp4',
'title': 'V75 fra Solvalla 30.05.15',
'description': 'md5:b3743425765355855f88e096acc93231', 'description': 'md5:b3743425765355855f88e096acc93231',
'thumbnail': 're:^https?://.*\.jpg', 'thumbnail': 're:^https?://.*\.jpg',
'duration': 0,
'duration': 25966,
'timestamp': 1432975582, 'timestamp': 1432975582,
'upload_date': '20150530', 'upload_date': '20150530',
'view_count': int, 'view_count': int,
@ -79,6 +102,20 @@ class VGTVIE(InfoExtractor):
'skip_download': True, 'skip_download': True,
}, },
}, },
{
'url': 'http://www.aftenposten.no/webtv/#!/video/21039/trailer-sweatshop-i-can-t-take-any-more',
'md5': 'fd828cd29774a729bf4d4425fe192972',
'info_dict': {
'id': '21039',
'ext': 'mov',
'title': 'TRAILER: «SWEATSHOP» - I can´t take any more',
'description': 'md5:21891f2b0dd7ec2f78d84a50e54f8238',
'duration': 66,
'timestamp': 1417002452,
'upload_date': '20141126',
'view_count': int,
}
},
{ {
'url': 'http://www.bt.no/tv/#!/video/100250/norling-dette-er-forskjellen-paa-1-divisjon-og-eliteserien', 'url': 'http://www.bt.no/tv/#!/video/100250/norling-dette-er-forskjellen-paa-1-divisjon-og-eliteserien',
'only_matching': True, 'only_matching': True,
@ -89,21 +126,27 @@ class VGTVIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id') video_id = mobj.group('id')
host = mobj.group('host') host = mobj.group('host')
HOST_WEBSITES = {
'vgtv': 'vgtv',
'bt': 'bttv',
}
appname = self._HOST_TO_APPNAME[host] if host else mobj.group('appname')
vendor = self._APP_NAME_TO_VENDOR[appname]
data = self._download_json( data = self._download_json(
'http://svp.vg.no/svp/api/v1/%s/assets/%s?appName=%s-website' 'http://svp.vg.no/svp/api/v1/%s/assets/%s?appName=%s-website'
% (host, video_id, HOST_WEBSITES[host]),
% (vendor, video_id, appname),
video_id, 'Downloading media JSON') video_id, 'Downloading media JSON')
if data.get('status') == 'inactive': if data.get('status') == 'inactive':
raise ExtractorError( raise ExtractorError(
'Video %s is no longer available' % video_id, expected=True) 'Video %s is no longer available' % video_id, expected=True)
info = {
'formats': [],
}
if len(video_id) == 5:
if appname == 'bttv':
info = self._extract_video_info('btno', video_id)
elif appname == 'aptv':
info = self._extract_video_info('ap', video_id)
streams = data['streamUrls'] streams = data['streamUrls']
stream_type = data.get('streamType') stream_type = data.get('streamType')
@ -111,48 +154,53 @@ class VGTVIE(InfoExtractor):
hls_url = streams.get('hls') hls_url = streams.get('hls')
if hls_url: if hls_url:
formats.extend(self._extract_m3u8_formats(
hls_url, video_id, 'mp4', m3u8_id='hls'))
m3u8_formats = self._extract_m3u8_formats(
hls_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
if m3u8_formats:
formats.extend(m3u8_formats)
hds_url = streams.get('hds') hds_url = streams.get('hds')
# wasLive hds are always 404 # wasLive hds are always 404
if hds_url and stream_type != 'wasLive': if hds_url and stream_type != 'wasLive':
formats.extend(self._extract_f4m_formats(
hds_url + '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18',
video_id, f4m_id='hds'))
f4m_formats = self._extract_f4m_formats(
hds_url + '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18', video_id, f4m_id='hds', fatal=False)
if f4m_formats:
formats.extend(f4m_formats)
mp4_urls = streams.get('pseudostreaming') or []
mp4_url = streams.get('mp4') mp4_url = streams.get('mp4')
if mp4_url: if mp4_url:
_url = hls_url or hds_url
MP4_URL_TEMPLATE = '%s/%%s.%s' % (mp4_url.rpartition('/')[0], mp4_url.rpartition('.')[-1])
for mp4_format in _url.split(','):
m = re.search('(?P<width>\d+)_(?P<height>\d+)_(?P<vbr>\d+)', mp4_format)
if not m:
continue
width = int(m.group('width'))
height = int(m.group('height'))
vbr = int(m.group('vbr'))
formats.append({
'url': MP4_URL_TEMPLATE % mp4_format,
'format_id': 'mp4-%s' % vbr,
'width': width,
'height': height,
'vbr': vbr,
'preference': 1,
mp4_urls.append(mp4_url)
for mp4_url in mp4_urls:
format_info = {
'url': mp4_url,
}
mobj = re.search('(\d+)_(\d+)_(\d+)', mp4_url)
if mobj:
tbr = int(mobj.group(3))
format_info.update({
'width': int(mobj.group(1)),
'height': int(mobj.group(2)),
'tbr': tbr,
'format_id': 'mp4-%s' % tbr,
}) })
self._sort_formats(formats)
formats.append(format_info)
info['formats'].extend(formats)
self._sort_formats(info['formats'])
return {
info.update({
'id': video_id, 'id': video_id,
'title': self._live_title(data['title']),
'title': self._live_title(data['title']) if stream_type == 'live' else data['title'],
'description': data['description'], 'description': data['description'],
'thumbnail': data['images']['main'] + '?t[]=900x506q80', 'thumbnail': data['images']['main'] + '?t[]=900x506q80',
'timestamp': data['published'], 'timestamp': data['published'],
'duration': float_or_none(data['duration'], 1000), 'duration': float_or_none(data['duration'], 1000),
'view_count': data['displays'], 'view_count': data['displays'],
'formats': formats,
'is_live': True if stream_type == 'live' else False, 'is_live': True if stream_type == 'live' else False,
}
})
return info
class BTArticleIE(InfoExtractor): class BTArticleIE(InfoExtractor):
@ -161,7 +209,7 @@ class BTArticleIE(InfoExtractor):
_VALID_URL = 'http://(?:www\.)?bt\.no/(?:[^/]+/)+(?P<id>[^/]+)-\d+\.html' _VALID_URL = 'http://(?:www\.)?bt\.no/(?:[^/]+/)+(?P<id>[^/]+)-\d+\.html'
_TEST = { _TEST = {
'url': 'http://www.bt.no/nyheter/lokalt/Kjemper-for-internatet-1788214.html', 'url': 'http://www.bt.no/nyheter/lokalt/Kjemper-for-internatet-1788214.html',
'md5': 'd055e8ee918ef2844745fcfd1a4175fb',
'md5': '2acbe8ad129b3469d5ae51b1158878df',
'info_dict': { 'info_dict': {
'id': '23199', 'id': '23199',
'ext': 'mp4', 'ext': 'mp4',
@ -178,15 +226,15 @@ class BTArticleIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
webpage = self._download_webpage(url, self._match_id(url)) webpage = self._download_webpage(url, self._match_id(url))
video_id = self._search_regex( video_id = self._search_regex(
r'SVP\.Player\.load\(\s*(\d+)', webpage, 'video id')
return self.url_result('vgtv:bt:%s' % video_id, 'VGTV')
r'<video[^>]+data-id="(\d+)"', webpage, 'video id')
return self.url_result('bttv:%s' % video_id, 'VGTV')
class BTVestlendingenIE(InfoExtractor): class BTVestlendingenIE(InfoExtractor):
IE_NAME = 'bt:vestlendingen' IE_NAME = 'bt:vestlendingen'
IE_DESC = 'Bergens Tidende - Vestlendingen' IE_DESC = 'Bergens Tidende - Vestlendingen'
_VALID_URL = 'http://(?:www\.)?bt\.no/spesial/vestlendingen/#!/(?P<id>\d+)' _VALID_URL = 'http://(?:www\.)?bt\.no/spesial/vestlendingen/#!/(?P<id>\d+)'
_TEST = {
_TESTS = [{
'url': 'http://www.bt.no/spesial/vestlendingen/#!/86588', 'url': 'http://www.bt.no/spesial/vestlendingen/#!/86588',
'md5': 'd7d17e3337dc80de6d3a540aefbe441b', 'md5': 'd7d17e3337dc80de6d3a540aefbe441b',
'info_dict': { 'info_dict': {
@ -197,7 +245,19 @@ class BTVestlendingenIE(InfoExtractor):
'timestamp': 1430473209, 'timestamp': 1430473209,
'upload_date': '20150501', 'upload_date': '20150501',
}, },
}
'skip': '404 Error',
}, {
'url': 'http://www.bt.no/spesial/vestlendingen/#!/86255',
'md5': 'a2893f8632e96389f4bdf36aa9463ceb',
'info_dict': {
'id': '86255',
'ext': 'mov',
'title': 'Du må tåle å fryse og være sulten',
'description': 'md5:b8046f4d022d5830ddab04865791d063',
'upload_date': '20150321',
'timestamp': 1426942023,
},
}]
def _real_extract(self, url): def _real_extract(self, url):
return self.url_result('xstream:btno:%s' % self._match_id(url), 'Xstream')
return self.url_result('bttv:%s' % self._match_id(url), 'VGTV')

+ 9
- 5
youtube_dl/extractor/xstream.py View File

@ -42,11 +42,7 @@ class XstreamIE(InfoExtractor):
'only_matching': True, 'only_matching': True,
}] }]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
partner_id = mobj.group('partner_id')
video_id = mobj.group('id')
def _extract_video_info(self, partner_id, video_id):
data = self._download_xml( data = self._download_xml(
'http://frontend.xstream.dk/%s/feed/video/?platform=web&id=%s' 'http://frontend.xstream.dk/%s/feed/video/?platform=web&id=%s'
% (partner_id, video_id), % (partner_id, video_id),
@ -97,6 +93,7 @@ class XstreamIE(InfoExtractor):
formats.append({ formats.append({
'url': link.get('href'), 'url': link.get('href'),
'format_id': link.get('rel'), 'format_id': link.get('rel'),
'preference': 1,
}) })
thumbnails = [{ thumbnails = [{
@ -113,3 +110,10 @@ class XstreamIE(InfoExtractor):
'formats': formats, 'formats': formats,
'thumbnails': thumbnails, 'thumbnails': thumbnails,
} }
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
partner_id = mobj.group('partner_id')
video_id = mobj.group('id')
return self._extract_video_info(partner_id, video_id)

Loading…
Cancel
Save