|
|
@ -1,11 +1,13 @@ |
|
|
|
# coding: utf-8 |
|
|
|
from __future__ import unicode_literals |
|
|
|
|
|
|
|
import re |
|
|
|
|
|
|
|
from .common import InfoExtractor |
|
|
|
|
|
|
|
|
|
|
|
class TeleBruxellesIE(InfoExtractor): |
|
|
|
_VALID_URL = r'https?://(?:www\.)?telebruxelles\.be/(news|sport|dernier-jt)/?(?P<id>[^/#?]+)' |
|
|
|
_VALID_URL = r'https?://(?:www\.)?(?:telebruxelles|bx1)\.be/(news|sport|dernier-jt)/?(?P<id>[^/#?]+)' |
|
|
|
_TESTS = [{ |
|
|
|
'url': 'http://www.telebruxelles.be/news/auditions-devant-parlement-francken-galant-tres-attendus/', |
|
|
|
'md5': '59439e568c9ee42fb77588b2096b214f', |
|
|
@ -39,18 +41,18 @@ class TeleBruxellesIE(InfoExtractor): |
|
|
|
webpage = self._download_webpage(url, display_id) |
|
|
|
|
|
|
|
article_id = self._html_search_regex( |
|
|
|
r"<article id=\"post-(\d+)\"", webpage, 'article ID') |
|
|
|
r"<article id=\"post-(\d+)\"", webpage, 'article ID', default=None) |
|
|
|
title = self._html_search_regex( |
|
|
|
r'<h1 class=\"entry-title\">(.*?)</h1>', webpage, 'title') |
|
|
|
description = self._og_search_description(webpage) |
|
|
|
description = self._og_search_description(webpage, default=None) |
|
|
|
|
|
|
|
rtmp_url = self._html_search_regex( |
|
|
|
r"file: \"(rtmp://\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d{1,5}/vod/mp4:\" \+ \"\w+\" \+ \".mp4)\"", |
|
|
|
r'file\s*:\s*"(rtmp://[^/]+/vod/mp4:"\s*\+\s*"[^"]+"\s*\+\s*".mp4)"', |
|
|
|
webpage, 'RTMP url') |
|
|
|
rtmp_url = rtmp_url.replace("\" + \"", "") |
|
|
|
rtmp_url = re.sub(r'"\s*\+\s*"', '', rtmp_url) |
|
|
|
|
|
|
|
return { |
|
|
|
'id': article_id, |
|
|
|
'id': article_id or display_id, |
|
|
|
'display_id': display_id, |
|
|
|
'title': title, |
|
|
|
'description': description, |
|
|
|