@ -4,6 +4,7 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
get_meta_content ,
int_or_none ,
parse_iso8601 ,
)
@ -28,20 +29,26 @@ class HeiseIE(InfoExtractor):
' timestamp ' : 1411812600 ,
' upload_date ' : ' 20140927 ' ,
' description ' : ' In uplink-Episode 3.3 geht es darum, wie man sich von Cloud-Anbietern emanzipieren kann, worauf man beim Kauf einer Tastatur achten sollte und was Smartphones über uns verraten. ' ,
' thumbnail ' : ' re:https?://.* \ .jpg$ ' ,
}
}
def _real_extract ( self , url ) :
video_id = self . _match_id ( url )
webpage = self . _download_webpage ( url , video_id )
json_url = self . _search_regex (
r ' json_url: \ s* " ([^ " ]+) " ' , webpage , ' json URL ' )
config = self . _download_json ( json_url , video_id )
container_id = self . _search_regex (
r ' <div class= " videoplayerjw " .*?data-container= " ([0-9]+) " ' ,
webpage , ' container ID ' )
sequenz_id = self . _search_regex (
r ' <div class= " videoplayerjw " .*?data-sequenz= " ([0-9]+) " ' ,
webpage , ' sequenz ID ' )
data_url = ' http://www.heise.de/videout/feed?container= %s &sequenz= %s ' % ( container_id , sequenz_id )
doc = self . _download_xml ( data_url , video_id )
info = {
' id ' : video_id ,
' thumbnail ' : config . get ( ' poster ' ) ,
' thumbnail ' : self . _og_search_thumbnail ( webpage ) ,
' timestamp ' : parse_iso8601 ( get_meta_content ( ' date ' , webpage ) ) ,
' description ' : self . _og_search_description ( webpage ) ,
}
@ -49,32 +56,19 @@ class HeiseIE(InfoExtractor):
title = get_meta_content ( ' fulltitle ' , webpage )
if title :
info [ ' title ' ] = title
elif config . get ( ' title ' ) :
info [ ' title ' ] = config [ ' title ' ]
else :
info [ ' title ' ] = self . _og_search_title ( webpage )
formats = [ ]
for t , rs in config [ ' formats ' ] . items ( ) :
if not rs or not hasattr ( rs , ' items ' ) :
self . _downloader . report_warning (
' formats: {0}: no resolutions ' . format ( t ) )
continue
for height_str , obj in rs . items ( ) :
format_id = ' {0}_{1} ' . format ( t , height_str )
if not obj or not obj . get ( ' url ' ) :
self . _downloader . report_warning (
' formats: {0}: no url ' . format ( format_id ) )
continue
formats . append ( {
' url ' : obj [ ' url ' ] ,
' format_id ' : format_id ,
' height ' : self . _int ( height_str , ' height ' ) ,
} )
for source_node in doc . findall ( ' .//{http://rss.jwpcdn.com/}source ' ) :
label = source_node . attrib [ ' label ' ]
height = int_or_none ( self . _search_regex (
r ' ^(.*?_)?([0-9]+)p$ ' , label , ' height ' , default = None ) )
formats . append ( {
' url ' : source_node . attrib [ ' file ' ] ,
' format_note ' : label ,
' height ' : height ,
} )
self . _sort_formats ( formats )
info [ ' formats ' ] = formats