@ -1,20 +1,25 @@
from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor
from ..utils import (
comp at_urllib_ pars e ,
unes capeHTML ,
)
class CSpanIE ( InfoExtractor ) :
_VALID_URL = r ' http://www \ .c-spanvideo \ .org/program/(.*) '
IE_DESC = ' C-SPAN '
_TEST = {
u ' url ' : u ' http://www.c-spanvideo.org/program/HolderonV ' ,
u ' file ' : u ' 315139.flv ' ,
u ' md5 ' : u ' 74a623266956f69e4df0068ab6c80fe4 ' ,
u ' info_dict ' : {
u " title " : u " Attorney General Eric Holder on Voting Rights Act Decision "
' url ' : ' http://www.c-spanvideo.org/program/HolderonV ' ,
' file ' : ' 315139.mp4 ' ,
' md5 ' : ' 8e44ce11f0f725527daccc453f553eb0 ' ,
' info_dict ' : {
' title ' : ' Attorney General Eric Holder on Voting Rights Act Decision ' ,
' description ' : ' Attorney General Eric Holder spoke to reporters following the Supreme Court decision in [Shelby County v. Holder] in which the court ruled that the preclearance provisions of the Voting Rights Act could not be enforced until Congress established new guidelines for review. ' ,
} ,
u ' skip ' : u ' Requires rtmpdump '
}
def _real_extract ( self , url ) :
@ -22,30 +27,22 @@ class CSpanIE(InfoExtractor):
prog_name = mobj . group ( 1 )
webpage = self . _download_webpage ( url , prog_name )
video_id = self . _search_regex ( r ' programid=(.*?)& ' , webpage , ' video id ' )
data = compat_urllib_parse . urlencode ( { ' programid ' : video_id ,
' dynamic ' : ' 1 ' } )
info_url = ' http://www.c-spanvideo.org/common/services/flashXml.php? ' + data
video_info = self . _download_webpage ( info_url , video_id , u ' Downloading video info ' )
self . report_extraction ( video_id )
title = self . _html_search_regex ( r ' <string name= " title " >(.*?)</string> ' ,
video_info , ' title ' )
description = self . _html_search_regex ( r ' <meta (?:property= " og:|name= " )description " content= " (.*?) " ' ,
webpage , ' description ' ,
flags = re . MULTILINE | re . DOTALL )
url = self . _search_regex ( r ' <string name= " URL " >(.*?)</string> ' ,
video_info , ' video url ' )
url = url . replace ( ' $(protocol) ' , ' rtmp ' ) . replace ( ' $(port) ' , ' 443 ' )
path = self . _search_regex ( r ' <string name= " path " >(.*?)</string> ' ,
video_info , ' rtmp play path ' )
return { ' id ' : video_id ,
' title ' : title ,
' ext ' : ' flv ' ,
' url ' : url ,
' play_path ' : path ,
' description ' : description ,
' thumbnail ' : self . _og_search_thumbnail ( webpage ) ,
}
title = self . _html_search_regex (
r ' <!-- title --> \ n \ s*<h1[^>]*>(.*?)</h1> ' , webpage , ' title ' )
description = self . _og_search_description ( webpage )
info_url = ' http://c-spanvideo.org/videoLibrary/assets/player/ajax-player.php?os=android&html5=program&id= ' + video_id
data_json = self . _download_webpage (
info_url , video_id , ' Downloading video info ' )
data = json . loads ( data_json )
url = unescapeHTML ( data [ ' video ' ] [ ' files ' ] [ 0 ] [ ' path ' ] [ ' #text ' ] )
return {
' id ' : video_id ,
' title ' : title ,
' url ' : url ,
' description ' : description ,
' thumbnail ' : self . _og_search_thumbnail ( webpage ) ,
}