@ -12,7 +12,6 @@ from ..utils import (
get_element_by_class ,
int_or_none ,
orderedSet ,
remove_start ,
str_or_none ,
str_to_int ,
unescapeHTML ,
@ -21,6 +20,7 @@ from ..utils import (
urlencode_postdata ,
)
from .dailymotion import DailymotionIE
from .odnoklassniki import OdnoklassnikiIE
from .pladform import PladformIE
from .vimeo import VimeoIE
from .youtube import YoutubeIE
@ -60,6 +60,18 @@ class VKBaseIE(InfoExtractor):
def _real_initialize ( self ) :
self . _login ( )
def _download_payload ( self , path , video_id , data , fatal = True ) :
data [ ' al ' ] = 1
code , payload = self . _download_json (
' https://vk.com/ %s .php ' % path , video_id ,
data = urlencode_postdata ( data ) , fatal = fatal ,
headers = { ' X-Requested-With ' : ' XMLHttpRequest ' } ) [ ' payload ' ]
if code == ' 3 ' :
self . raise_login_required ( )
elif code == ' 8 ' :
raise ExtractorError ( clean_html ( payload [ 0 ] [ 1 : - 1 ] ) , expected = True )
return payload
class VKIE ( VKBaseIE ) :
IE_NAME = ' vk '
@ -96,7 +108,6 @@ class VKIE(VKBaseIE):
} ,
{
' url ' : ' http://vk.com/video205387401_165548505 ' ,
' md5 ' : ' 6c0aeb2e90396ba97035b9cbde548700 ' ,
' info_dict ' : {
' id ' : ' 205387401_165548505 ' ,
' ext ' : ' mp4 ' ,
@ -110,18 +121,18 @@ class VKIE(VKBaseIE):
} ,
{
' note ' : ' Embedded video ' ,
' url ' : ' http://vk.com/video_ext.php?oid=32194266&id=162925554&hash=7d8c2e0d5e05aeaa&hd=1 ' ,
' md5 ' : ' c7ce8f1f87bec05b3de07fdeafe21a0a ' ,
' url ' : ' https://vk.com/video_ext.php?oid=-77521&id=162222515&hash=87b046504ccd8bfa ' ,
' md5 ' : ' 7babad3b85ea2e91948005b1b8b0cb84 ' ,
' info_dict ' : {
' id ' : ' 32194266_162925554 ' ,
' id ' : ' -77521_162222515 ' ,
' ext ' : ' mp4 ' ,
' uploader ' : ' Vladimir Gavrin ' ,
' title ' : ' Lin Dan ' ,
' duration ' : 101 ,
' upload_date ' : ' 20120730 ' ,
' view_count ' : int ,
' uploader ' : ' re:(?:Noize MC|Alexander Ilyashenko).* ' ,
' title ' : ' ProtivoGunz - Хуёвая песня ' ,
' duration ' : 195 ,
' upload_date ' : ' 20120212 ' ,
' timestamp ' : 1329049880 ,
' uploader_id ' : ' -77521 ' ,
} ,
' skip ' : ' This video has been removed from public access. ' ,
} ,
{
# VIDEO NOW REMOVED
@ -138,18 +149,19 @@ class VKIE(VKBaseIE):
' upload_date ' : ' 20121218 ' ,
' view_count ' : int ,
} ,
' skip ' : ' Requires vk account credentials ' ,
' skip ' : ' Removed ' ,
} ,
{
' url ' : ' http://vk.com/hd_kino_mania?z=video-43215063_168067957 %2F 15c66b9b533119788d ' ,
' md5 ' : ' 4d7a5ef8cf114dfa09577e57b2993202 ' ,
' info_dict ' : {
' id ' : ' -43215063_168067957 ' ,
' ext ' : ' mp4 ' ,
' uploader ' : ' Киномания - лучшее из мира кино ' ,
' uploader ' : ' Bro Mazter ' ,
' title ' : ' ' ,
' duration ' : 7291 ,
' upload_date ' : ' 20140328 ' ,
' uploader_id ' : ' 223413403 ' ,
' timestamp ' : 1396018030 ,
} ,
' skip ' : ' Requires vk account credentials ' ,
} ,
@ -165,7 +177,7 @@ class VKIE(VKBaseIE):
' upload_date ' : ' 20140626 ' ,
' view_count ' : int ,
} ,
' skip ' : ' Only works from Russia ' ,
' skip ' : ' Removed ' ,
} ,
{
# video (removed?) only available with list id
@ -247,6 +259,9 @@ class VKIE(VKBaseIE):
' uploader_id ' : ' -387766 ' ,
' timestamp ' : 1475137527 ,
} ,
' params ' : {
' skip_download ' : True ,
} ,
} ,
{
# live stream, hls and rtmp links, most likely already finished live
@ -288,80 +303,94 @@ class VKIE(VKBaseIE):
mobj = re . match ( self . _VALID_URL , url )
video_id = mobj . group ( ' videoid ' )
mv_data = { }
if video_id :
info_url = ' https://vk.com/al_video.php?act=show_inline&al=1&video= ' + video_id
data = {
' act ' : ' show_inline ' ,
' video ' : video_id ,
}
# Some videos (removed?) can only be downloaded with list id specified
list_id = mobj . group ( ' list_id ' )
if list_id :
info_url + = ' &list= %s ' % list_id
data [ ' list ' ] = list_id
payload = self . _download_payload ( ' al_video ' , video_id , data )
info_page = payload [ 1 ]
opts = payload [ - 1 ]
mv_data = opts . get ( ' mvData ' ) or { }
player = opts . get ( ' player ' ) or { }
else :
info_url = ' http://vk.com/video_ext.php? ' + mobj . group ( ' embed_query ' )
video_id = ' %s _ %s ' % ( mobj . group ( ' oid ' ) , mobj . group ( ' id ' ) )
info_page = self . _download_webpage ( info_url , video_id )
info_page = self . _download_webpage (
' http://vk.com/video_ext.php? ' + mobj . group ( ' embed_query ' ) , video_id )
error_message = self . _html_search_regex (
[ r ' (?s)<!><div[^>]+class= " video_layer_message " [^>]*>(.+?)</div> ' ,
r ' (?s)<div[^>]+id= " video_ext_msg " [^>]*>(.+?)</div> ' ] ,
info_page , ' error message ' , default = None )
if error_message :
raise ExtractorError ( error_message , expected = True )
error_message = self . _html_search_regex (
[ r ' (?s)<!><div[^>]+class= " video_layer_message " [^>]*>(.+?)</div> ' ,
r ' (?s)<div[^>]+id= " video_ext_msg " [^>]*>(.+?)</div> ' ] ,
info_page , ' error message ' , default = None )
if error_message :
raise ExtractorError ( error_message , expected = True )
if re . search ( r ' <!>/login \ .php \ ?.* \ bact=security_check ' , info_page ) :
raise ExtractorError (
' You are trying to log in from an unusual location. You should confirm ownership at vk.com to log in with this IP. ' ,
expected = True )
if re . search ( r ' <!>/login \ .php \ ?.* \ bact=security_check ' , info_page ) :
raise ExtractorError (
' You are trying to log in from an unusual location. You should confirm ownership at vk.com to log in with this IP. ' ,
expected = True )
ERROR_COPYRIGHT = ' Video %s has been removed from public access due to rightholder complaint. '
ERROR_COPYRIGHT = ' Video %s has been removed from public access due to rightholder complaint. '
ERRORS = {
r ' >Видеозапись .*? была изъята из публичного доступа в связи с обращением правообладателя.< ' :
ERROR_COPYRIGHT ,
ERRORS = {
r ' >Видеозапись .*? была изъята из публичного доступа в связи с обращением правообладателя.< ' :
ERROR_COPYRIGHT ,
r ' >The video .*? was removed from public access by request of the copyright holder.< ' :
ERROR_COPYRIGHT ,
r ' >The video .*? was removed from public access by request of the copyright holder.< ' :
ERROR_COPYRIGHT ,
r ' <!>Please log in or < ' :
' Video %s is only available for registered users, '
' use --username and --password options to provide account credentials. ' ,
r ' <!>Please log in or < ' :
' Video %s is only available for registered users, '
' use --username and --password options to provide account credentials. ' ,
r ' <!>Unknown error ' :
' Video %s does not exist. ' ,
r ' <!>Unknown error ' :
' Video %s does not exist. ' ,
r ' <!>Видео временно недоступно ' :
' Video %s is temporarily unavailable. ' ,
r ' <!>Видео временно недоступно ' :
' Video %s is temporarily unavailable. ' ,
r ' <!>Access denied ' :
' Access denied to video %s . ' ,
r ' <!>Access denied ' :
' Access denied to video %s . ' ,
r ' <!>Видеозапись недоступна, так как её автор был заблокирован. ' :
' Video %s is no longer available, because its author has been blocked. ' ,
r ' <!>Видеозапись недоступна, так как её автор был заблокирован. ' :
' Video %s is no longer available, because its author has been blocked. ' ,
r ' <!>This video is no longer available, because its author has been blocked. ' :
' Video %s is no longer available, because its author has been blocked. ' ,
r ' <!>This video is no longer available, because its author has been blocked. ' :
' Video %s is no longer available, because its author has been blocked. ' ,
r ' <!>This video is no longer available, because it has been deleted. ' :
' Video %s is no longer available, because it has been deleted. ' ,
r ' <!>This video is no longer available, because it has been deleted. ' :
' Video %s is no longer available, because it has been deleted. ' ,
r ' <!>The video .+? is not available in your region. ' :
' Video %s is not available in your region. ' ,
}
r ' <!>The video .+? is not available in your region. ' :
' Video %s is not available in your region. ' ,
}
for error_re , error_msg in ERRORS . items ( ) :
if re . search ( error_re , info_page ) :
raise ExtractorError ( error_msg % video_id , expected = True )
for error_re , error_msg in ERRORS . items ( ) :
if re . search ( error_re , info_page ) :
raise ExtractorError ( error_msg % video_id , expected = True )
player = self . _parse_json ( self . _search_regex (
r ' var \ s+playerParams \ s*= \ s*({.+?}) \ s*; \ s* \ n ' ,
info_page , ' player params ' ) , video_id )
youtube_url = YoutubeIE . _extract_url ( info_page )
if youtube_url :
return self . url_result ( youtube_url , ie = YoutubeIE . ie_key ( ) )
return self . url_result ( youtube_url , YoutubeIE . ie_key ( ) )
vimeo_url = VimeoIE . _extract_url ( url , info_page )
if vimeo_url is not None :
return self . url_result ( vimeo_url )
return self . url_result ( vimeo_url , VimeoIE . ie_key ( ) )
pladform_url = PladformIE . _extract_url ( info_page )
if pladform_url :
return self . url_result ( pladform_url )
return self . url_result ( pladform_url , PladformIE . ie_key ( ) )
m_rutube = re . search (
r ' \ ssrc= " ((?:https?:)?//rutube \ .ru \\ ?/(?:video|play) \\ ?/embed(?:.*?)) \\ ? " ' , info_page )
@ -374,6 +403,10 @@ class VKIE(VKBaseIE):
if dailymotion_urls :
return self . url_result ( dailymotion_urls [ 0 ] , DailymotionIE . ie_key ( ) )
odnoklassniki_url = OdnoklassnikiIE . _extract_url ( info_page )
if odnoklassniki_url :
return self . url_result ( odnoklassniki_url , OdnoklassnikiIE . ie_key ( ) )
m_opts = re . search ( r ' (?s)var \ s+opts \ s*= \ s*({.+?}); ' , info_page )
if m_opts :
m_opts_url = re . search ( r " url \ s*: \ s* ' ((?!/ \ b)[^ ' ]+) " , m_opts . group ( 1 ) )
@ -383,38 +416,7 @@ class VKIE(VKBaseIE):
opts_url = ' http: ' + opts_url
return self . url_result ( opts_url )
# vars does not look to be served anymore since 24.10.2016
data = self . _parse_json (
self . _search_regex (
r ' var \ s+vars \ s*= \ s*({.+?}); ' , info_page , ' vars ' , default = ' {} ' ) ,
video_id , fatal = False )
# <!json> is served instead
if not data :
data = self . _parse_json (
self . _search_regex (
[ r ' <!json> \ s*({.+?}) \ s*<!> ' , r ' <!json> \ s*({.+}) ' ] ,
info_page , ' json ' , default = ' {} ' ) ,
video_id )
if data :
data = data [ ' player ' ] [ ' params ' ] [ 0 ]
if not data :
data = self . _parse_json (
self . _search_regex (
r ' var \ s+playerParams \ s*= \ s*({.+?}) \ s*; \ s* \ n ' , info_page ,
' player params ' , default = ' {} ' ) ,
video_id )
if data :
data = data [ ' params ' ] [ 0 ]
# <!--{...}
if not data :
data = self . _parse_json (
self . _search_regex (
r ' <!-- \ s*({.+}) ' , info_page , ' payload ' ) ,
video_id ) [ ' payload ' ] [ - 1 ] [ - 1 ] [ ' player ' ] [ ' params ' ] [ 0 ]
data = player [ ' params ' ] [ 0 ]
title = unescapeHTML ( data [ ' md_title ' ] )
# 2 = live
@ -463,12 +465,12 @@ class VKIE(VKBaseIE):
' title ' : title ,
' thumbnail ' : data . get ( ' jpg ' ) ,
' uploader ' : data . get ( ' md_author ' ) ,
' uploader_id ' : str_or_none ( data . get ( ' author_id ' ) ) ,
' duration ' : data . get ( ' duration ' ) ,
' uploader_id ' : str_or_none ( data . get ( ' author_id ' ) or mv_data . get ( ' authorId ' ) ) ,
' duration ' : int_or_none ( data . get ( ' duration ' ) or mv_data . get ( ' duration ' ) ) ,
' timestamp ' : timestamp ,
' view_count ' : view_count ,
' like_count ' : int_or_none ( data . get ( ' liked ' ) ) ,
' dislike _count' : int_or_none ( data . get ( ' nolikes ' ) ) ,
' like_count ' : int_or_none ( mv_ data. get ( ' likes ' ) ) ,
' comment _count' : int_or_none ( mv_ data. get ( ' commcount ' ) ) ,
' is_live ' : is_live ,
}
@ -482,7 +484,6 @@ class VKUserVideosIE(VKBaseIE):
' url ' : ' http://vk.com/videos205387401 ' ,
' info_dict ' : {
' id ' : ' 205387401 ' ,
' title ' : " Tom Cruise ' s Videos " ,
} ,
' playlist_mincount ' : 4 ,
} , {
@ -498,22 +499,25 @@ class VKUserVideosIE(VKBaseIE):
' url ' : ' http://new.vk.com/videos205387401 ' ,
' only_matching ' : True ,
} ]
_VIDEO = collections . namedtuple (
' Video ' , [ ' owner_id ' , ' id ' , ' thumb ' , ' title ' , ' flags ' , ' duration ' , ' hash ' , ' moder_acts ' , ' owner ' , ' date ' , ' views ' , ' platform ' , ' blocked ' , ' music_video_meta ' ] )
def _real_extract ( self , url ) :
page_id = self . _match_id ( url )
webpage = self . _download_webpage ( url , page_id )
l = self . _download_payload ( ' al_video ' , page_id , {
' act ' : ' load_videos_silent ' ,
' oid ' : page_id ,
} ) [ 0 ] [ ' ' ] [ ' list ' ]
entries = [
self . url_result (
' http://vk.com/video ' + video_id , ' VK ' , video_id = video_id )
for video_id in orderedSet ( re . findall ( r ' href= " /video(-?[0-9_]+) " ' , webpage ) ) ]
title = unescapeHTML ( self . _search_regex (
r ' <title> \ s*([^<]+?) \ s+ \ | \ s+ \ d+ \ s+videos ' ,
webpage , ' title ' , default = page_id ) )
entries = [ ]
for video in l :
v = self . _VIDEO . _make ( video )
video_id = ' %d _ %d ' % ( v . owner_id , v . id )
entries . append ( self . url_result (
' http://vk.com/video ' + video_id , ' VK ' , video_id = video_id ) )
return self . playlist_result ( entries , page_id , title )
return self . playlist_result ( entries , page_id )
class VKWallPostIE ( VKBaseIE ) :
@ -523,15 +527,15 @@ class VKWallPostIE(VKBaseIE):
# public page URL, audio playlist
' url ' : ' https://vk.com/bs.official?w=wall-23538238_35 ' ,
' info_dict ' : {
' id ' : ' 23538238_35 ' ,
' title ' : ' Black Shadow - Wall post 23538238_35 ' ,
' id ' : ' - 23538238_35' ,
' title ' : ' Black Shadow - Wall post - 23538238_35 ' ,
' description ' : ' md5:3f84b9c4f9ef499731cf1ced9998cc0c ' ,
} ,
' playlist ' : [ {
' md5 ' : ' 5ba93864ec5b85f7ce19a9af4af080f6 ' ,
' info_dict ' : {
' id ' : ' 135220665_111806521 ' ,
' ext ' : ' mp3 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Black Shadow - Слепое Верование ' ,
' duration ' : 370 ,
' uploader ' : ' Black Shadow ' ,
@ -542,18 +546,16 @@ class VKWallPostIE(VKBaseIE):
' md5 ' : ' 4cc7e804579122b17ea95af7834c9233 ' ,
' info_dict ' : {
' id ' : ' 135220665_111802303 ' ,
' ext ' : ' mp3 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Black Shadow - Война - Негасимое Бездны Пламя! ' ,
' duration ' : 423 ,
' uploader ' : ' Black Shadow ' ,
' artist ' : ' Black Shadow ' ,
' track ' : ' Война - Негасимое Бездны Пламя! ' ,
} ,
' params ' : {
' skip_download ' : True ,
} ,
} ] ,
' params ' : {
' skip_download ' : True ,
' usenetrc ' : True ,
} ,
' skip ' : ' Requires vk account credentials ' ,
@ -562,7 +564,7 @@ class VKWallPostIE(VKBaseIE):
' url ' : ' https://vk.com/wall85155021_6319 ' ,
' info_dict ' : {
' id ' : ' 85155021_6319 ' ,
' title ' : ' Sergey Gorbunov - Wall post 85155021_6319' ,
' title ' : ' Сергей Горбунов - Wall post 85155021_6319' ,
} ,
' playlist_count ' : 1 ,
' params ' : {
@ -578,58 +580,73 @@ class VKWallPostIE(VKBaseIE):
' url ' : ' https://m.vk.com/wall-23538238_35 ' ,
' only_matching ' : True ,
} ]
_BASE64_CHARS = ' abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMN0PQRSTUVWXYZO123456789+/= '
_AUDIO = collections . namedtuple (
' Audio ' , [ ' id ' , ' owner_id ' , ' url ' , ' title ' , ' performer ' , ' duration ' , ' album_id ' , ' unk ' , ' author_link ' , ' lyrics ' , ' flags ' , ' context ' , ' extra ' , ' hashes ' , ' cover_url ' , ' ads ' , ' subtitle ' , ' main_artists ' , ' feat_artists ' , ' album ' , ' track_code ' , ' restriction ' , ' album_part ' , ' new_stats ' , ' access_key ' ] )
def _decode ( self , enc ) :
dec = ' '
e = n = 0
for c in enc :
r = self . _BASE64_CHARS . index ( c )
cond = n % 4
e = 64 * e + r if cond else r
n + = 1
if cond :
dec + = chr ( 255 & e >> ( - 2 * n & 6 ) )
return dec
def _unmask_url ( self , mask_url , vk_id ) :
if ' audio_api_unavailable ' in mask_url :
extra = mask_url . split ( ' ?extra= ' ) [ 1 ] . split ( ' # ' )
func , base = self . _decode ( extra [ 1 ] ) . split ( chr ( 11 ) )
assert ( func == ' i ' )
mask_url = list ( self . _decode ( extra [ 0 ] ) )
url_len = len ( mask_url )
indexes = [ None ] * url_len
index = int ( base ) ^ vk_id
for n in range ( url_len - 1 , - 1 , - 1 ) :
index = ( url_len * ( n + 1 ) ^ index + n ) % url_len
indexes [ n ] = index
for n in range ( 1 , url_len ) :
c = mask_url [ n ]
index = indexes [ url_len - 1 - n ]
mask_url [ n ] = mask_url [ index ]
mask_url [ index ] = c
mask_url = ' ' . join ( mask_url )
return mask_url
def _real_extract ( self , url ) :
post_id = self . _match_id ( url )
wall_url = ' https://vk.com/wall %s ' % post_id
post_id = remove_start ( post_id , ' - ' )
webpage = self . _download_webpage ( wall_url , post_id )
error = self . _html_search_regex (
r ' >Error</div> \ s*<div[^>]+class=[ " \' ]body[ " \' ][^>]*>([^<]+) ' ,
webpage , ' error ' , default = None )
if error :
raise ExtractorError ( ' VK said: %s ' % error , expected = True )
webpage = self . _download_payload ( ' wkview ' , post_id , {
' act ' : ' show ' ,
' w ' : ' wall ' + post_id ,
} ) [ 1 ]
description = clean_html ( get_element_by_class ( ' wall_post_text ' , webpage ) )
uploader = clean_html ( get_element_by_class ( ' author ' , webpage ) )
thumbnail = self . _og_search_thumbnail ( webpage )
entries = [ ]
audio_ids = re . findall ( r ' data-full-id=[ " \' ]( \ d+_ \ d+) ' , webpage )
if audio_ids :
al_audio = self . _download_webpage (
' https://vk.com/al_audio.php ' , post_id ,
note = ' Downloading audio info ' , fatal = False ,
data = urlencode_postdata ( {
' act ' : ' reload_audio ' ,
' al ' : ' 1 ' ,
' ids ' : ' , ' . join ( audio_ids )
} ) )
if al_audio :
Audio = collections . namedtuple (
' Audio ' , [ ' id ' , ' user_id ' , ' url ' , ' track ' , ' artist ' , ' duration ' ] )
audios = self . _parse_json (
self . _search_regex (
r ' <!json>(.+?)<!> ' , al_audio , ' audios ' , default = ' [] ' ) ,
post_id , fatal = False , transform_source = unescapeHTML )
if isinstance ( audios , list ) :
for audio in audios :
a = Audio . _make ( audio [ : 6 ] )
entries . append ( {
' id ' : ' %s _ %s ' % ( a . user_id , a . id ) ,
' url ' : a . url ,
' title ' : ' %s - %s ' % ( a . artist , a . track ) if a . artist and a . track else a . id ,
' thumbnail ' : thumbnail ,
' duration ' : a . duration ,
' uploader ' : uploader ,
' artist ' : a . artist ,
' track ' : a . track ,
} )
for audio in re . findall ( r ' data-audio= " ([^ " ]+) ' , webpage ) :
audio = self . _parse_json ( unescapeHTML ( audio ) , post_id )
a = self . _AUDIO . _make ( audio )
if not a . url :
continue
title = unescapeHTML ( a . title )
entries . append ( {
' id ' : ' %s _ %s ' % ( a . owner_id , a . id ) ,
' url ' : self . _unmask_url ( a . url , a . ads [ ' vk_id ' ] ) ,
' title ' : ' %s - %s ' % ( a . performer , title ) if a . performer else title ,
' thumbnail ' : a . cover_url . split ( ' , ' ) if a . cover_url else None ,
' duration ' : a . duration ,
' uploader ' : uploader ,
' artist ' : a . performer ,
' track ' : title ,
' ext ' : ' mp4 ' ,
' protocol ' : ' m3u8 ' ,
} )
for video in re . finditer (
r ' <a[^>]+href=([ " \' ])(?P<url>/video(?:-?[ \ d_]+).*?) \ 1 ' , webpage ) :