Browse Source

[canal13cl] fix info extraction

totalwebcasting
remitamine 9 years ago
parent
commit
8b55cadc83
3 changed files with 78 additions and 49 deletions
  1. +1
    -1
      youtube_dl/extractor/__init__.py
  2. +0
    -48
      youtube_dl/extractor/canal13cl.py
  3. +77
    -0
      youtube_dl/extractor/tele13.py

+ 1
- 1
youtube_dl/extractor/__init__.py View File

@ -67,7 +67,6 @@ from .camdemy import (
CamdemyIE, CamdemyIE,
CamdemyFolderIE CamdemyFolderIE
) )
from .canal13cl import Canal13clIE
from .canalplus import CanalplusIE from .canalplus import CanalplusIE
from .canalc2 import Canalc2IE from .canalc2 import Canalc2IE
from .cbs import CBSIE from .cbs import CBSIE
@ -612,6 +611,7 @@ from .teachingchannel import TeachingChannelIE
from .teamcoco import TeamcocoIE from .teamcoco import TeamcocoIE
from .techtalks import TechTalksIE from .techtalks import TechTalksIE
from .ted import TEDIE from .ted import TEDIE
from .tele13 import Tele13IE
from .telebruxelles import TeleBruxellesIE from .telebruxelles import TeleBruxellesIE
from .telecinco import TelecincoIE from .telecinco import TelecincoIE
from .telegraaf import TelegraafIE from .telegraaf import TelegraafIE


+ 0
- 48
youtube_dl/extractor/canal13cl.py View File

@ -1,48 +0,0 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class Canal13clIE(InfoExtractor):
_VALID_URL = r'^http://(?:www\.)?13\.cl/(?:[^/?#]+/)*(?P<id>[^/?#]+)'
_TEST = {
'url': 'http://www.13.cl/t13/nacional/el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda',
'md5': '4cb1fa38adcad8fea88487a078831755',
'info_dict': {
'id': '1403022125',
'display_id': 'el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda',
'ext': 'mp4',
'title': 'El "círculo de hierro" de Michelle Bachelet en su regreso a La Moneda',
'description': '(Foto: Agencia Uno) En nueve días más, Michelle Bachelet va a asumir por segunda vez como presidenta de la República. Entre aquellos que la acompañarán hay caras que se repiten y otras que se consolidan en su entorno de colaboradores más cercanos.',
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
display_id = mobj.group('id')
webpage = self._download_webpage(url, display_id)
title = self._html_search_meta(
'twitter:title', webpage, 'title', fatal=True)
description = self._html_search_meta(
'twitter:description', webpage, 'description')
url = self._html_search_regex(
r'articuloVideo = \"(.*?)\"', webpage, 'url')
real_id = self._search_regex(
r'[^0-9]([0-9]{7,})[^0-9]', url, 'id', default=display_id)
thumbnail = self._html_search_regex(
r'articuloImagen = \"(.*?)\"', webpage, 'thumbnail')
return {
'id': real_id,
'display_id': display_id,
'url': url,
'title': title,
'description': description,
'ext': 'mp4',
'thumbnail': thumbnail,
}

+ 77
- 0
youtube_dl/extractor/tele13.py View File

@ -0,0 +1,77 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import js_to_json
class Tele13IE(InfoExtractor):
_VALID_URL = r'^http://(?:www\.)?t13\.cl/videos(?:/[^/]+)+/(?P<id>[\w-]+)'
_TESTS = [
{
'url': 'http://www.t13.cl/videos/actualidad/el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda',
'md5': '4cb1fa38adcad8fea88487a078831755',
'info_dict': {
'id': 'el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda',
'ext': 'mp4',
'title': 'El c\u00edrculo de hierro de Michelle Bachelet en su regreso a La Moneda',
}
},
{
'url': 'http://www.t13.cl/videos/mundo/tendencias/video-captan-misteriosa-bola-fuego-cielos-bangkok',
'md5': '65d1ae54812c96f4b345dd21d3bb1adc',
'info_dict': {
'id': 'rOoKv2OMpOw',
'ext': 'mp4',
'title': 'Shooting star seen on 7-Sep-2015',
'description': 'md5:a1cd2e74f6ee6851552c9cf5851d6b06',
'uploader': 'Porjai Jaturongkhakun',
'upload_date': '20150906',
'uploader_id': 'UCnLY_3ezwNcDSC_Wc6suZxw',
},
'add_ie': ['Youtube'],
}
]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
setup_js = self._parse_json(
js_to_json(
self._search_regex(
r"jwplayer\('player-vivo'\).setup\((\{.*?\})\)",
webpage,
'setup code',
flags=re.DOTALL
).replace('\n//', '')
),
display_id
)
title = setup_js['title']
thumbnail = setup_js.get('image') or setup_js['playlist'][0].get('image')
description = self._html_search_meta(
'description', webpage, 'description')
formats = []
for f in setup_js['playlist'][0]['sources']:
format_url = f['file']
if format_url != '':
if '.m3u8' in format_url:
formats.extend(self._extract_m3u8_formats(format_url, display_id))
else:
if 'youtube.com' in format_url:
return self.url_result(format_url, 'Youtube')
else:
formats.append({'url': format_url, 'format_id': f.get('label')})
return {
'id': display_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'formats': formats,
}

Loading…
Cancel
Save