Browse Source

[tv2:article] Add extractor (Closes #5724)

totalwebcasting
Sergey M․ 10 years ago
parent
commit
588b82bbf8
2 changed files with 37 additions and 1 deletions
  1. +4
    -1
      youtube_dl/extractor/__init__.py
  2. +33
    -0
      youtube_dl/extractor/tv2.py

+ 4
- 1
youtube_dl/extractor/__init__.py View File

@ -572,7 +572,10 @@ from .tumblr import TumblrIE
from .tunein import TuneInIE from .tunein import TuneInIE
from .turbo import TurboIE from .turbo import TurboIE
from .tutv import TutvIE from .tutv import TutvIE
from .tv2 import TV2IE
from .tv2 import (
TV2IE,
TV2ArticleIE,
)
from .tv4 import TV4IE from .tv4 import TV4IE
from .tvigle import TvigleIE from .tvigle import TvigleIE
from .tvp import TvpIE, TvpSeriesIE from .tvp import TvpIE, TvpSeriesIE


+ 33
- 0
youtube_dl/extractor/tv2.py View File

@ -1,12 +1,15 @@
# encoding: utf-8 # encoding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
int_or_none, int_or_none,
float_or_none, float_or_none,
parse_iso8601, parse_iso8601,
remove_end,
) )
@ -91,3 +94,33 @@ class TV2IE(InfoExtractor):
'categories': categories, 'categories': categories,
'formats': formats, 'formats': formats,
} }
class TV2ArticleIE(InfoExtractor):
_VALID_URL = 'http://(?:www\.)?tv2\.no/(?:a|\d{4}/\d{2}/\d{2}(/[^/]+)+)/(?P<id>\d+)'
_TESTS = [{
'url': 'http://www.tv2.no/2015/05/16/nyheter/alesund/krim/pingvin/6930542',
'info_dict': {
'id': '6930542',
'title': 'Russen hetses etter pingvintyveri – innrømmer å ha åpnet luken på buret',
'description': 'md5:339573779d3eea3542ffe12006190954',
},
'playlist_count': 2,
}, {
'url': 'http://www.tv2.no/a/6930542',
'only_matching': True,
}]
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
entries = [
self.url_result('http://www.tv2.no/v/%s' % video_id, 'TV2')
for video_id in re.findall(r'data-assetid="(\d+)"', webpage)]
title = remove_end(self._og_search_title(webpage), ' - TV2.no')
description = remove_end(self._og_search_description(webpage), ' - TV2.no')
return self.playlist_result(entries, playlist_id, title, description)

Loading…
Cancel
Save