@ -15,12 +15,12 @@ class AirMozillaIE(InfoExtractor):
_VALID_URL = r ' https?://air \ .mozilla \ .org/(?P<id>[0-9a-z-]+)/? '
_VALID_URL = r ' https?://air \ .mozilla \ .org/(?P<id>[0-9a-z-]+)/? '
_TEST = {
_TEST = {
' url ' : ' https://air.mozilla.org/privacy-lab-a-meetup-for-privacy-minded-people-in-san-francisco/ ' ,
' url ' : ' https://air.mozilla.org/privacy-lab-a-meetup-for-privacy-minded-people-in-san-francisco/ ' ,
' md5 ' : ' 2e3e7486ba5d180e829d453875b9b8bf ' ,
' md5 ' : ' 8d02f53ee39cf006009180e21df1f3ba ' ,
' info_dict ' : {
' info_dict ' : {
' id ' : ' 6x4q2w ' ,
' id ' : ' 6x4q2w ' ,
' ext ' : ' mp4 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Privacy Lab - a meetup for privacy minded people in San Francisco ' ,
' title ' : ' Privacy Lab - a meetup for privacy minded people in San Francisco ' ,
' thumbnail ' : r ' re:https?://vid \ .ly/(?P<id>[0-9a-z-]+)/poster ' ,
' thumbnail ' : r ' re:https?://.*/poster \ .jpg ' ,
' description ' : ' Brings together privacy professionals and others interested in privacy at for-profits, non-profits, and NGOs in an effort to contribute to the state of the ecosystem... ' ,
' description ' : ' Brings together privacy professionals and others interested in privacy at for-profits, non-profits, and NGOs in an effort to contribute to the state of the ecosystem... ' ,
' timestamp ' : 1422487800 ,
' timestamp ' : 1422487800 ,
' upload_date ' : ' 20150128 ' ,
' upload_date ' : ' 20150128 ' ,
@ -34,21 +34,13 @@ class AirMozillaIE(InfoExtractor):
def _real_extract ( self , url ) :
def _real_extract ( self , url ) :
display_id = self . _match_id ( url )
display_id = self . _match_id ( url )
webpage = self . _download_webpage ( url , display_id )
webpage = self . _download_webpage ( url , display_id )
video_id = self . _html_search_regex ( r ' //vid.ly/(.*?)/embed ' , webpage , ' id ' )
video_id = self . _html_search_regex ( r ' //vid \ .ly/(.*?)/embed' , webpage , ' id ' )
embed_script = self . _download_webpage ( ' https://vid.ly/{0}/embed ' . format ( video_id ) , video_id )
embed_script = self . _download_webpage ( ' https://vid.ly/{0}/embed ' . format ( video_id ) , video_id )
jwconfig = self . _search_regex ( r ' \ svar jwconfig = ( \ {.*? \ }); \ s ' , embed_script , ' metadata ' )
metadata = self . _parse_json ( jwconfig , video_id )
formats = [ {
' url ' : source [ ' file ' ] ,
' ext ' : source [ ' type ' ] ,
' format_id ' : self . _search_regex ( r ' &format=(.*)$ ' , source [ ' file ' ] , ' video format ' ) ,
' format ' : source [ ' label ' ] ,
' height ' : int ( source [ ' label ' ] . rstrip ( ' p ' ) ) ,
} for source in metadata [ ' playlist ' ] [ 0 ] [ ' sources ' ] ]
self . _sort_formats ( formats )
jwconfig = self . _parse_json ( self . _search_regex (
r ' initCallback \ ((.*) \ ); ' , embed_script , ' metadata ' ) , video_id ) [ ' config ' ]
info_dict = self . _parse_jwplayer_data ( jwconfig , video_id )
view_count = int_or_none ( self . _html_search_regex (
view_count = int_or_none ( self . _html_search_regex (
r ' Views since archived: ([0-9]+) ' ,
r ' Views since archived: ([0-9]+) ' ,
webpage , ' view count ' , fatal = False ) )
webpage , ' view count ' , fatal = False ) )
@ -58,17 +50,17 @@ class AirMozillaIE(InfoExtractor):
r ' Duration: \ s*( \ d+ \ s*hours? \ s* \ d+ \ s*minutes?) ' ,
r ' Duration: \ s*( \ d+ \ s*hours? \ s* \ d+ \ s*minutes?) ' ,
webpage , ' duration ' , fatal = False ) )
webpage , ' duration ' , fatal = False ) )
return {
info_dict . update ( {
' id ' : video_id ,
' id ' : video_id ,
' title ' : self . _og_search_title ( webpage ) ,
' title ' : self . _og_search_title ( webpage ) ,
' formats ' : formats ,
' url ' : self . _og_search_url ( webpage ) ,
' url ' : self . _og_search_url ( webpage ) ,
' display_id ' : display_id ,
' display_id ' : display_id ,
' thumbnail ' : metadata [ ' playlist ' ] [ 0 ] . get ( ' image ' ) ,
' description ' : self . _og_search_description ( webpage ) ,
' description ' : self . _og_search_description ( webpage ) ,
' timestamp ' : timestamp ,
' timestamp ' : timestamp ,
' location ' : self . _html_search_regex ( r ' Location: (.*) ' , webpage , ' location ' , default = None ) ,
' location ' : self . _html_search_regex ( r ' Location: (.*) ' , webpage , ' location ' , default = None ) ,
' duration ' : duration ,
' duration ' : duration ,
' view_count ' : view_count ,
' view_count ' : view_count ,
' categories ' : re . findall ( r ' <a href= " .*? " class= " channel " >(.*?)</a> ' , webpage ) ,
' categories ' : re . findall ( r ' <a href= " .*? " class= " channel " >(.*?)</a> ' , webpage ) ,
}
} )
return info_dict