@ -2,12 +2,11 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import remove_end
class SBSIE ( InfoExtractor ) :
IE_DESC = ' sbs.com.au '
_VALID_URL = r ' https?://(?:www \ .)?sbs \ .com \ .au/ondemand/video/(?:single/)?(?P<id>[0-9]+) '
_VALID_URL = r ' https?://(?:www \ .)?sbs \ .com \ .au/(?: ondemand|news) /video/(?:single/)?(?P<id>[0-9]+) '
_TESTS = [ {
# Original URL is handled by the generic IE which finds the iframe:
@ -17,43 +16,36 @@ class SBSIE(InfoExtractor):
' info_dict ' : {
' id ' : ' 320403011771 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Dingo Conservation ' ,
' description ' : ' Dingoes are on the brink of extinction; most of the animals we think are dingoes are in fact crossbred with wild dogs. This family run a dingo conservation park to prevent their extinction ' ,
' title ' : ' Dingo Conservation (The Feed) ' ,
' description ' : ' md5:f250a9856fca50d22dec0b5b8015f8a5 ' ,
' thumbnail ' : ' re:http://.* \ .jpg ' ,
' duration ' : 308 ,
} ,
' add_ies ' : [ ' generic ' ] ,
} , {
' url ' : ' http://www.sbs.com.au/ondemand/video/320403011771/Dingo-Conservation-The-Feed ' ,
' only_matching ' : True ,
} , {
' url ' : ' http://www.sbs.com.au/news/video/471395907773/The-Feed-July-9 ' ,
' only_matching ' : True ,
} ]
def _real_extract ( self , url ) :
video_id = self . _match_id ( url )
# the video is in the following iframe
iframe_url = ' http://www.sbs.com.au/ondemand/video/single/ ' + video_id + ' ?context=web '
webpage = self . _download_webpage ( iframe_url , video_id )
player_params = self . _search_regex (
r ' (?s)(playerParams.+?releaseUrls.+? \ n) ' ,
webpage , ' playerParams ' )
player_params_js = self . _search_regex (
r ' ({.*}) ' ,
player_params , ' player_param_js ' )
player_params_json = self . _parse_json ( player_params_js , video_id )
webpage = self . _download_webpage (
' http://www.sbs.com.au/ondemand/video/single/ %s ?context=web ' % video_id , video_id )
theplatform_url = player_params_json . get ( ' releaseUrls ' ) [ ' progressive ' ] or player_params_json . get ( ' releaseUrls ' ) [ ' standard ' ]
player_params = self . _parse_json (
self . _search_regex (
r ' (?s)var \ s+playerParams \ s*= \ s*({.+?}); ' , webpage , ' playerParams ' ) ,
video_id )
title = remove_end ( self . _og_search_title ( webpage , default = video_id , fatal = False ) , ' (The Feed) ' )
description = self . _html_search_meta ( ' description ' , webpage )
thumbnail = self . _og_search_thumbnail ( webpage )
urls = player_params [ ' releaseUrls ' ]
theplatform_url = ( urls . get ( ' progressive ' ) or urls . get ( ' standard ' ) or
urls . get ( ' html ' ) or player_params [ ' relatedItemsURL ' ] )
return {
' _type ' : ' url_transparent ' ,
' id ' : video_id ,
' url ' : theplatform_url ,
' title ' : title ,
' description ' : description ,
' thumbnail ' : thumbnail ,
}