@ -1,14 +1,15 @@
from __future__ import unicode_literals
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from .common import InfoExtractor
from ..compat import (
from ..compat import (
compat_HTTPError ,
compat_urllib_parse ,
compat_urllib_parse ,
compat_urllib_request ,
compat_urllib_request ,
compat_urlparse ,
)
)
from ..utils import (
from ..utils import (
ExtractorError ,
ExtractorError ,
float_or_none ,
int_or_none ,
int_or_none ,
sanitized_Request ,
sanitized_Request ,
)
)
@ -19,6 +20,8 @@ class UdemyIE(InfoExtractor):
_VALID_URL = r ' https?://www \ .udemy \ .com/(?:[^#]+#/lecture/|lecture/view/? \ ?lectureId=)(?P<id> \ d+) '
_VALID_URL = r ' https?://www \ .udemy \ .com/(?:[^#]+#/lecture/|lecture/view/? \ ?lectureId=)(?P<id> \ d+) '
_LOGIN_URL = ' https://www.udemy.com/join/login-popup/?displayType=ajax&showSkipButton=1 '
_LOGIN_URL = ' https://www.udemy.com/join/login-popup/?displayType=ajax&showSkipButton=1 '
_ORIGIN_URL = ' https://www.udemy.com '
_ORIGIN_URL = ' https://www.udemy.com '
_SUCCESSFULLY_ENROLLED = ' >You have enrolled in this course!< '
_ALREADY_ENROLLED = ' >You are already taking this course.< '
_NETRC_MACHINE = ' udemy '
_NETRC_MACHINE = ' udemy '
_TESTS = [ {
_TESTS = [ {
@ -34,6 +37,29 @@ class UdemyIE(InfoExtractor):
' skip ' : ' Requires udemy account credentials ' ,
' skip ' : ' Requires udemy account credentials ' ,
} ]
} ]
def _enroll_course ( self , webpage , course_id ) :
enroll_url = self . _search_regex (
r ' href=([ " \' ])(?P<url>https?://(?:www \ .)?udemy \ .com/course/subscribe/.+?) \ 1 ' ,
webpage , ' enroll url ' , group = ' url ' ,
default = ' https://www.udemy.com/course/subscribe/?courseId= %s ' % course_id )
webpage = self . _download_webpage ( enroll_url , course_id , ' Enrolling in the course ' )
if self . _SUCCESSFULLY_ENROLLED in webpage :
self . to_screen ( ' %s : Successfully enrolled in ' % course_id )
elif self . _ALREADY_ENROLLED in webpage :
self . to_screen ( ' %s : Already enrolled in ' % course_id )
def _download_lecture ( self , course_id , lecture_id ) :
return self . _download_json (
' https://www.udemy.com/api-2.0/users/me/subscribed-courses/ %s /lectures/ %s ? %s ' % (
course_id , lecture_id , compat_urllib_parse . urlencode ( {
' video_only ' : ' ' ,
' auto_play ' : ' ' ,
' fields[lecture] ' : ' title,description,asset ' ,
' fields[asset] ' : ' asset_type,stream_url,thumbnail_url,download_urls,data ' ,
' instructorPreviewMode ' : ' False ' ,
} ) ) ,
lecture_id , ' Downloading lecture JSON ' , fatal = False )
def _handle_error ( self , response ) :
def _handle_error ( self , response ) :
if not isinstance ( response , dict ) :
if not isinstance ( response , dict ) :
return
return
@ -45,7 +71,7 @@ class UdemyIE(InfoExtractor):
error_str + = ' - %s ' % error_data . get ( ' formErrors ' )
error_str + = ' - %s ' % error_data . get ( ' formErrors ' )
raise ExtractorError ( error_str , expected = True )
raise ExtractorError ( error_str , expected = True )
def _download_json ( self , url_or_request , video_id , note = ' Downloading JSON metadata ' ) :
def _download_json ( self , url_or_request , video_id , note = ' Downloading JSON metadata ' , * args , * * kwargs ) :
headers = {
headers = {
' X-Udemy-Snail-Case ' : ' true ' ,
' X-Udemy-Snail-Case ' : ' true ' ,
' X-Requested-With ' : ' XMLHttpRequest ' ,
' X-Requested-With ' : ' XMLHttpRequest ' ,
@ -55,6 +81,7 @@ class UdemyIE(InfoExtractor):
headers [ ' X-Udemy-Client-Id ' ] = cookie . value
headers [ ' X-Udemy-Client-Id ' ] = cookie . value
elif cookie . name == ' access_token ' :
elif cookie . name == ' access_token ' :
headers [ ' X-Udemy-Bearer-Token ' ] = cookie . value
headers [ ' X-Udemy-Bearer-Token ' ] = cookie . value
headers [ ' X-Udemy-Authorization ' ] = ' Bearer %s ' % cookie . value
if isinstance ( url_or_request , compat_urllib_request . Request ) :
if isinstance ( url_or_request , compat_urllib_request . Request ) :
for header , value in headers . items ( ) :
for header , value in headers . items ( ) :
@ -62,7 +89,7 @@ class UdemyIE(InfoExtractor):
else :
else :
url_or_request = sanitized_Request ( url_or_request , headers = headers )
url_or_request = sanitized_Request ( url_or_request , headers = headers )
response = super ( UdemyIE , self ) . _download_json ( url_or_request , video_id , note )
response = super ( UdemyIE , self ) . _download_json ( url_or_request , video_id , note , * args , * * kwargs )
self . _handle_error ( response )
self . _handle_error ( response )
return response
return response
@ -110,66 +137,77 @@ class UdemyIE(InfoExtractor):
def _real_extract ( self , url ) :
def _real_extract ( self , url ) :
lecture_id = self . _match_id ( url )
lecture_id = self . _match_id ( url )
lecture = self . _download_json (
' https://www.udemy.com/api-1.1/lectures/ %s ' % lecture_id ,
lecture_id , ' Downloading lecture JSON ' )
webpage = self . _download_webpage ( url , lecture_id )
course_id = self . _search_regex (
r ' data-course-id=[ " \' ]( \ d+) ' , webpage , ' course id ' )
asset_type = lecture . get ( ' assetType ' ) or lecture . get ( ' asset_type ' )
try :
lecture = self . _download_lecture ( course_id , lecture_id )
except ExtractorError as e :
# Error could possibly mean we are not enrolled in the course
if isinstance ( e . cause , compat_HTTPError ) and e . cause . code == 403 :
self . _enroll_course ( webpage , course_id )
lecture_id = self . _download_lecture ( course_id , lecture_id )
else :
raise
title = lecture [ ' title ' ]
description = lecture . get ( ' description ' )
asset = lecture [ ' asset ' ]
asset_type = asset . get ( ' assetType ' ) or asset . get ( ' asset_type ' )
if asset_type != ' Video ' :
if asset_type != ' Video ' :
raise ExtractorError (
raise ExtractorError (
' Lecture %s is not a video ' % lecture_id , expected = True )
' Lecture %s is not a video ' % lecture_id , expected = True )
asset = lecture [ ' asset ' ]
stream_url = asset . get ( ' streamUrl ' ) or asset . get ( ' stream_url ' )
stream_url = asset . get ( ' streamUrl ' ) or asset . get ( ' stream_url ' )
mobj = re . search ( r ' (https?://www \ .youtube \ .com/watch \ ?v=.*) ' , stream_url )
if mobj :
return self . url_result ( mobj . group ( 1 ) , ' Youtube ' )
if stream_url :
youtube_url = self . _search_regex (
r ' (https?://www \ .youtube \ .com/watch \ ?v=.*) ' , stream_url , ' youtube URL ' , default = None )
if youtube_url :
return self . url_result ( youtube_url , ' Youtube ' )
video_id = asset [ ' id ' ]
video_id = asset [ ' id ' ]
thumbnail = asset . get ( ' thumbnailUrl ' ) or asset . get ( ' thumbnail_url ' )
thumbnail = asset . get ( ' thumbnailUrl ' ) or asset . get ( ' thumbnail_url ' )
duration = int_or_none ( asset . get ( ' data ' , { } ) . get ( ' duration ' ) )
download_url = asset . get ( ' downloadUrl ' ) or asset . get ( ' download_url ' )
video = download_url . get ( ' Video ' ) or download_url . get ( ' video ' )
video_480p = download_url . get ( ' Video480p ' ) or download_url . get ( ' video_480p ' )
formats = [ {
' url ' : video_480p [ 0 ] ,
' format_id ' : ' download-360p ' ,
} , {
' url ' : video [ 0 ] ,
' format_id ' : ' download-720p ' ,
} ]
# Some videos also contain formats in asset['data']['outputs'] (e.g.
# https://www.udemy.com/ios9-swift/learn/#/lecture/3383208)
outputs = asset . get ( ' data ' , { } ) . get ( ' outputs ' )
if isinstance ( outputs , dict ) :
for format_id , f in outputs . items ( ) :
video_url = f . get ( ' url ' )
if video_url :
formats . append ( {
' url ' : video_url ,
' format_id ' : ' %s p ' % ( f . get ( ' labe1l ' ) or format_id ) ,
' width ' : int_or_none ( f . get ( ' width ' ) ) ,
' height ' : int_or_none ( f . get ( ' height ' ) ) ,
' vbr ' : int_or_none ( f . get ( ' video_bitrate_in_kbps ' ) ) ,
' vcodec ' : f . get ( ' video_codec ' ) ,
' fps ' : int_or_none ( f . get ( ' frame_rate ' ) ) ,
' abr ' : int_or_none ( f . get ( ' audio_bitrate_in_kbps ' ) ) ,
' acodec ' : f . get ( ' audio_codec ' ) ,
' asr ' : int_or_none ( f . get ( ' audio_sample_rate ' ) ) ,
' tbr ' : int_or_none ( f . get ( ' total_bitrate_in_kbps ' ) ) ,
' filesize ' : int_or_none ( f . get ( ' file_size_in_bytes ' ) ) ,
duration = float_or_none ( asset . get ( ' data ' , { } ) . get ( ' duration ' ) )
outputs = asset . get ( ' data ' , { } ) . get ( ' outputs ' , { } )
formats = [ ]
for format_ in asset . get ( ' download_urls ' , { } ) . get ( ' Video ' , [ ] ) :
video_url = format_ . get ( ' file ' )
if not video_url :
continue
format_id = format_ . get ( ' label ' )
f = {
' url ' : format_ [ ' file ' ] ,
' height ' : int_or_none ( format_id ) ,
}
if format_id :
# Some videos contain additional metadata (e.g.
# https://www.udemy.com/ios9-swift/learn/#/lecture/3383208)
output = outputs . get ( format_id )
if isinstance ( output , dict ) :
f . update ( {
' format_id ' : ' %s p ' % ( output . get ( ' label ' ) or format_id ) ,
' width ' : int_or_none ( output . get ( ' width ' ) ) ,
' height ' : int_or_none ( output . get ( ' height ' ) ) ,
' vbr ' : int_or_none ( output . get ( ' video_bitrate_in_kbps ' ) ) ,
' vcodec ' : output . get ( ' video_codec ' ) ,
' fps ' : int_or_none ( output . get ( ' frame_rate ' ) ) ,
' abr ' : int_or_none ( output . get ( ' audio_bitrate_in_kbps ' ) ) ,
' acodec ' : output . get ( ' audio_codec ' ) ,
' asr ' : int_or_none ( output . get ( ' audio_sample_rate ' ) ) ,
' tbr ' : int_or_none ( output . get ( ' total_bitrate_in_kbps ' ) ) ,
' filesize ' : int_or_none ( output . get ( ' file_size_in_bytes ' ) ) ,
} )
} )
else :
f [ ' format_id ' ] = ' %s p ' % format_id
formats . append ( f )
self . _sort_formats ( formats )
self . _sort_formats ( formats )
title = lecture [ ' title ' ]
description = lecture . get ( ' description ' )
return {
return {
' id ' : video_id ,
' id ' : video_id ,
' title ' : title ,
' title ' : title ,
@ -182,9 +220,7 @@ class UdemyIE(InfoExtractor):
class UdemyCourseIE ( UdemyIE ) :
class UdemyCourseIE ( UdemyIE ) :
IE_NAME = ' udemy:course '
IE_NAME = ' udemy:course '
_VALID_URL = r ' https?://www \ .udemy \ .com/(?P<coursepath>[ \ da-z-]+) '
_SUCCESSFULLY_ENROLLED = ' >You have enrolled in this course!< '
_ALREADY_ENROLLED = ' >You are already taking this course.< '
_VALID_URL = r ' https?://www \ .udemy \ .com/(?P<id>[ \ da-z-]+) '
_TESTS = [ ]
_TESTS = [ ]
@classmethod
@classmethod
@ -192,24 +228,18 @@ class UdemyCourseIE(UdemyIE):
return False if UdemyIE . suitable ( url ) else super ( UdemyCourseIE , cls ) . suitable ( url )
return False if UdemyIE . suitable ( url ) else super ( UdemyCourseIE , cls ) . suitable ( url )
def _real_extract ( self , url ) :
def _real_extract ( self , url ) :
mobj = re . match ( self . _VALID_URL , url )
course_path = mobj . group ( ' coursepath ' )
course_path = self . _match_id ( url )
webpage = self . _download_webpage ( url , course_path )
response = self . _download_json (
response = self . _download_json (
' https://www.udemy.com/api-1.1/courses/ %s ' % course_path ,
' https://www.udemy.com/api-1.1/courses/ %s ' % course_path ,
course_path , ' Downloading course JSON ' )
course_path , ' Downloading course JSON ' )
course_id = int ( response [ ' id ' ] )
course_title = response [ ' title ' ]
course_id = response [ ' id ' ]
course_title = response . get ( ' title ' )
webpage = self . _download_webpage (
' https://www.udemy.com/course/subscribe/?courseId= %s ' % course_id ,
course_id , ' Enrolling in the course ' )
if self . _SUCCESSFULLY_ENROLLED in webpage :
self . to_screen ( ' %s : Successfully enrolled in ' % course_id )
elif self . _ALREADY_ENROLLED in webpage :
self . to_screen ( ' %s : Already enrolled in ' % course_id )
self . _enroll_course ( webpage , course_id )
response = self . _download_json (
response = self . _download_json (
' https://www.udemy.com/api-1.1/courses/ %s /curriculum ' % course_id ,
' https://www.udemy.com/api-1.1/courses/ %s /curriculum ' % course_id ,