Browse Source

[nextmedia] Add support for NextTV (壹電視)

master-ytdl-org
Yen Chi Hsuan 8 years ago
parent
commit
bc35ed3fb6
No known key found for this signature in database GPG Key ID: 7F902A182457CA23
3 changed files with 60 additions and 1 deletions
  1. +6
    -0
      ChangeLog
  2. +1
    -0
      youtube_dl/extractor/extractors.py
  3. +53
    -1
      youtube_dl/extractor/nextmedia.py

+ 6
- 0
ChangeLog View File

@ -1,3 +1,9 @@
version <unreleased>
Extractors
+ [nextmedia] Add support for NextTV (壹電視)
version 2017.01.22 version 2017.01.22
Extractors Extractors


+ 1
- 0
youtube_dl/extractor/extractors.py View File

@ -598,6 +598,7 @@ from .nextmedia import (
NextMediaIE, NextMediaIE,
NextMediaActionNewsIE, NextMediaActionNewsIE,
AppleDailyIE, AppleDailyIE,
NextTVIE,
) )
from .nfb import NFBIE from .nfb import NFBIE
from .nfl import NFLIE from .nfl import NFLIE


+ 53
- 1
youtube_dl/extractor/nextmedia.py View File

@ -3,7 +3,14 @@ from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urlparse from ..compat import compat_urlparse
from ..utils import parse_iso8601
from ..utils import (
clean_html,
get_element_by_class,
int_or_none,
parse_iso8601,
remove_start,
unified_timestamp,
)
class NextMediaIE(InfoExtractor): class NextMediaIE(InfoExtractor):
@ -184,3 +191,48 @@ class AppleDailyIE(NextMediaIE):
def _fetch_description(self, page): def _fetch_description(self, page):
return self._html_search_meta('description', page, 'news description') return self._html_search_meta('description', page, 'news description')
class NextTVIE(InfoExtractor):
IE_DESC = '壹電視'
_VALID_URL = r'https?://(?:www\.)?nexttv\.com\.tw/(?:[^/]+/)+(?P<id>\d+)'
_TEST = {
'url': 'http://www.nexttv.com.tw/news/realtime/politics/11779671',
'info_dict': {
'id': '11779671',
'ext': 'mp4',
'title': '「超收稅」近4千億! 藍議員籲發消費券',
'thumbnail': r're:^https?://.*\.jpg$',
'timestamp': 1484825400,
'upload_date': '20170119',
'view_count': int,
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(
r'<h1[^>]*>([^<]+)</h1>', webpage, 'title')
data = self._hidden_inputs(webpage)
video_url = data['ntt-vod-src-detailview']
date_str = get_element_by_class('date', webpage)
timestamp = unified_timestamp(date_str + '+0800') if date_str else None
view_count = int_or_none(remove_start(
clean_html(get_element_by_class('click', webpage)), '點閱:'))
return {
'id': video_id,
'title': title,
'url': video_url,
'thumbnail': data.get('ntt-vod-img-src'),
'timestamp': timestamp,
'view_count': view_count,
}

Loading…
Cancel
Save