@ -12,34 +12,60 @@ from ..compat import (
)
from ..utils import (
determine_ext ,
unified_strdate ,
unified_timestamp ,
try_get ,
int_or_none ,
)
class RutubeIE ( InfoExtractor ) :
class RutubeBaseIE ( InfoExtractor ) :
def _extract_video ( self , video , video_id = None , require_title = True ) :
title = video [ ' title ' ] if require_title else video . get ( ' title ' )
age_limit = video . get ( ' is_adult ' )
if age_limit is not None :
age_limit = 18 if age_limit is True else 0
uploader_id = try_get ( video , lambda x : x [ ' author ' ] [ ' id ' ] )
category = try_get ( video , lambda x : x [ ' category ' ] [ ' name ' ] )
return {
' id ' : video . get ( ' id ' ) or video_id ,
' title ' : title ,
' description ' : video . get ( ' description ' ) ,
' thumbnail ' : video . get ( ' thumbnail_url ' ) ,
' duration ' : int_or_none ( video . get ( ' duration ' ) ) ,
' uploader ' : try_get ( video , lambda x : x [ ' author ' ] [ ' name ' ] ) ,
' uploader_id ' : compat_str ( uploader_id ) if uploader_id else None ,
' timestamp ' : unified_timestamp ( video . get ( ' created_ts ' ) ) ,
' category ' : [ category ] if category else None ,
' age_limit ' : age_limit ,
' view_count ' : int_or_none ( video . get ( ' hits ' ) ) ,
' comment_count ' : int_or_none ( video . get ( ' comments_count ' ) ) ,
' is_live ' : video . get ( ' is_livestream ' ) ,
}
class RutubeIE ( RutubeBaseIE ) :
IE_NAME = ' rutube '
IE_DESC = ' Rutube videos '
_VALID_URL = r ' https?://rutube \ .ru/(?:video|(?:play/)?embed)/(?P<id>[ \ da-z]{32}) '
_TESTS = [ {
' url ' : ' http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/ ' ,
' md5 ' : ' 79938ade01294ef7e27574890d0d3769 ' ,
' info_dict ' : {
' id ' : ' 3eac3b4561676c17df9132a9a1e62e3e ' ,
' ext ' : ' mp4 ' ,
' ext ' : ' flv ' ,
' title ' : ' Раненный кенгуру забежал в аптеку ' ,
' description ' : ' http://www.ntdtv.ru ' ,
' duration ' : 80 ,
' uploader ' : ' NTDRussian ' ,
' uploader_id ' : ' 29790 ' ,
' timestamp ' : 1381943602 ,
' upload_date ' : ' 20131016 ' ,
' age_limit ' : 0 ,
} ,
' params ' : {
# It requires ffmpeg (m3u8 download)
' skip_download ' : True ,
} ,
} , {
' url ' : ' http://rutube.ru/play/embed/a10e53b86e8f349080f718582ce4c661 ' ,
' only_matching ' : True ,
@ -49,20 +75,14 @@ class RutubeIE(InfoExtractor):
} , {
' url ' : ' http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/?pl_id=4252 ' ,
' only_matching ' : True ,
} , {
' url ' : ' https://rutube.ru/video/10b3a03fc01d5bbcc632a2f3514e8aab/?pl_type=source ' ,
' only_matching ' : True ,
} ]
@classmethod
def suitable ( cls , url ) :
parts = compat_urllib_parse_urlparse ( url )
params = compat_parse_qs ( parts . query )
# see if URL without parameters is OK
res = super ( RutubeIE , cls ) . suitable ( url )
if params : # we only allow pl_id parameter in the url
res = res and ' pl_id ' in params and len ( params ) == 1
return res
return False if RutubePlaylistIE . suitable ( url ) else super ( RutubeIE , cls ) . suitable ( url )
@staticmethod
def _extract_urls ( webpage ) :
@ -72,12 +92,12 @@ class RutubeIE(InfoExtractor):
def _real_extract ( self , url ) :
video_id = self . _match_id ( url )
video = self . _download_json (
' http://rutube.ru/api/video/ %s /?format=json ' % video_id ,
video_id , ' Downloading video JSON ' )
# Some videos don't have the author field
author = video . get ( ' author ' ) or { }
info = self . _extract_video ( video , video_id )
options = self . _download_json (
' http://rutube.ru/api/play/options/ %s /?format=json ' % video_id ,
@ -99,19 +119,8 @@ class RutubeIE(InfoExtractor):
} )
self . _sort_formats ( formats )
return {
' id ' : video [ ' id ' ] ,
' title ' : video [ ' title ' ] ,
' description ' : video [ ' description ' ] ,
' duration ' : video [ ' duration ' ] ,
' view_count ' : video [ ' hits ' ] ,
' formats ' : formats ,
' thumbnail ' : video [ ' thumbnail_url ' ] ,
' uploader ' : author . get ( ' name ' ) ,
' uploader_id ' : compat_str ( author [ ' id ' ] ) if author else None ,
' upload_date ' : unified_strdate ( video [ ' created_ts ' ] ) ,
' age_limit ' : 18 if video [ ' is_adult ' ] else 0 ,
}
info [ ' formats ' ] = formats
return info
class RutubeEmbedIE ( InfoExtractor ) :
@ -123,7 +132,8 @@ class RutubeEmbedIE(InfoExtractor):
' url ' : ' http://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38= ' ,
' info_dict ' : {
' id ' : ' a10e53b86e8f349080f718582ce4c661 ' ,
' ext ' : ' mp4 ' ,
' ext ' : ' flv ' ,
' timestamp ' : 1387830582 ,
' upload_date ' : ' 20131223 ' ,
' uploader_id ' : ' 297833 ' ,
' description ' : ' Видео группы ★http://vk.com/foxkidsreset★ музей Fox Kids и Jetix<br/><br/> восстановлено и сделано в шикоформате subziro89 http://vk.com/subziro89 ' ,
@ -131,7 +141,7 @@ class RutubeEmbedIE(InfoExtractor):
' title ' : ' Мистический городок Эйри в Индиан 5 серия озвучка subziro89 ' ,
} ,
' params ' : {
' skip_download ' : ' Requires ffmpeg ' ,
' skip_download ' : True ,
} ,
} , {
' url ' : ' http://rutube.ru/play/embed/8083783 ' ,
@ -145,10 +155,51 @@ class RutubeEmbedIE(InfoExtractor):
canonical_url = self . _html_search_regex (
r ' <link \ s+rel= " canonical " \ s+href= " ([^ " ]+?) " ' , webpage ,
' Canonical URL ' )
return self . url_result ( canonical_url , ' Rutube ' )
return self . url_result ( canonical_url , RutubeIE . ie_key ( ) )
class RutubeChannelIE ( InfoExtractor ) :
class RutubePlaylistBaseIE ( RutubeBaseIE ) :
def _next_page_url ( self , page_num , playlist_id , * args , * * kwargs ) :
return self . _PAGE_TEMPLATE % ( playlist_id , page_num )
def _entries ( self , playlist_id , * args , * * kwargs ) :
next_page_url = None
for pagenum in itertools . count ( 1 ) :
page = self . _download_json (
next_page_url or self . _next_page_url (
pagenum , playlist_id , * args , * * kwargs ) ,
playlist_id , ' Downloading page %s ' % pagenum )
results = page . get ( ' results ' )
if not results or not isinstance ( results , list ) :
break
for result in results :
video_url = result . get ( ' video_url ' )
if not video_url or not isinstance ( video_url , compat_str ) :
continue
entry = self . _extract_video ( result , require_title = False )
entry . update ( {
' _type ' : ' url ' ,
' url ' : video_url ,
' ie_key ' : RutubeIE . ie_key ( ) ,
} )
yield entry
next_page_url = page . get ( ' next ' )
if not next_page_url or not page . get ( ' has_next ' ) :
break
def _extract_playlist ( self , playlist_id , * args , * * kwargs ) :
return self . playlist_result (
self . _entries ( playlist_id , * args , * * kwargs ) ,
playlist_id , kwargs . get ( ' playlist_name ' ) )
def _real_extract ( self , url ) :
return self . _extract_playlist ( self . _match_id ( url ) )
class RutubeChannelIE ( RutubePlaylistBaseIE ) :
IE_NAME = ' rutube:channel '
IE_DESC = ' Rutube channels '
_VALID_URL = r ' https?://rutube \ .ru/tags/video/(?P<id> \ d+) '
@ -162,27 +213,8 @@ class RutubeChannelIE(InfoExtractor):
_PAGE_TEMPLATE = ' http://rutube.ru/api/tags/video/ %s /?page= %s &format=json '
def _extract_videos ( self , channel_id , channel_title = None ) :
entries = [ ]
for pagenum in itertools . count ( 1 ) :
page = self . _download_json (
self . _PAGE_TEMPLATE % ( channel_id , pagenum ) ,
channel_id , ' Downloading page %s ' % pagenum )
results = page [ ' results ' ]
if not results :
break
entries . extend ( self . url_result ( result [ ' video_url ' ] , ' Rutube ' ) for result in results )
if not page [ ' has_next ' ] :
break
return self . playlist_result ( entries , channel_id , channel_title )
def _real_extract ( self , url ) :
mobj = re . match ( self . _VALID_URL , url )
channel_id = mobj . group ( ' id ' )
return self . _extract_videos ( channel_id )
class RutubeMovieIE ( RutubeChannel IE ) :
class RutubeMovieIE ( RutubePlaylistBaseIE ) :
IE_NAME = ' rutube:movie '
IE_DESC = ' Rutube movies '
_VALID_URL = r ' https?://rutube \ .ru/metainfo/tv/(?P<id> \ d+) '
@ -196,11 +228,11 @@ class RutubeMovieIE(RutubeChannelIE):
movie = self . _download_json (
self . _MOVIE_TEMPLATE % movie_id , movie_id ,
' Downloading movie JSON ' )
movie_name = movie [ ' name ' ]
return self . _extract_videos ( movie_id , movie_name )
return self . _extract_playlist (
movie_id , playlist_name = movie . get ( ' name ' ) )
class RutubePersonIE ( RutubeChannel IE ) :
class RutubePersonIE ( RutubePlaylistBase IE ) :
IE_NAME = ' rutube:person '
IE_DESC = ' Rutube person videos '
_VALID_URL = r ' https?://rutube \ .ru/video/person/(?P<id> \ d+) '
@ -215,65 +247,33 @@ class RutubePersonIE(RutubeChannelIE):
_PAGE_TEMPLATE = ' http://rutube.ru/api/video/person/ %s /?page= %s &format=json '
class RutubePlaylistIE ( InfoExtractor ) :
class RutubePlaylistIE ( RutubePlaylistBaseIE ) :
IE_NAME = ' rutube:playlist '
IE_DESC = ' Rutube playlists '
_VALID_URL = r ' https?://rutube \ .ru/(?:video|(?:play/)?embed)/[ \ da-z]{32}/ \ ?.*? \ bpl_id=(?P<id> \ d+) '
_TESTS = [ {
' url ' : ' https://rutube.ru/video/10b3a03fc01d5bbcc632a2f3514e8aab/?pl_id=4252&pl_type=source ' ,
' url ' : ' https://rutube.ru/video/cecd58ed7d531fc0f3d795d51cee9026/?pl_id=3097&pl_type=tag ' ,
' info_dict ' : {
' id ' : ' 4252 ' ,
' id ' : ' 3097 ' ,
} ,
' playlist_count ' : 25 ,
' playlist_count ' : 27 ,
} , {
' url ' : ' https://rutube.ru/video/10b3a03fc01d5bbcc632a2f3514e8aab/?pl_id=4252&pl_type=source ' ,
' only_matching ' : True ,
} ]
_VALID_URL = r ' https?://rutube \ .ru/(?:video|(?:play/)?embed)/[ \ da-z]{32}/ \ ?(?:.+)?pl_id=(?P<id> \ d+) '
_PAGE_TEMPLATE = ' http://rutube.ru/api/playlist/source/ %s /?page= %s '
_PAGE_TEMPLATE = ' http://rutube.ru/api/playlist/ %s / %s /?page= %s &format=json '
@staticmethod
def suitable ( url ) :
params = compat_parse_qs ( compat_urllib_parse_urlparse ( url ) . query )
return params . get ( ' pl_id ' ) and int_or_none ( params [ ' pl_id ' ] [ 0 ] ) \
and params . get ( ' pl_type ' )
def _real_extract ( self , url ) :
playlist_id = self . _match_id ( url )
return self . _extract_playlist ( playlist_id )
def _extract_playlist ( self , playlist_id ) :
entries = [ ]
for pagenum in itertools . count ( 1 ) :
page_url = self . _PAGE_TEMPLATE % ( playlist_id , pagenum )
# download_json will sent an accept: application/xml header
page = self . _download_json ( page_url , playlist_id ,
" Downloading metadata for page %s " % pagenum ,
headers = { ' Accept ' : ' application/json ' } )
if not page [ ' results ' ] :
break
return params . get ( ' pl_type ' , [ None ] ) [ 0 ] and int_or_none ( params . get ( ' pl_id ' , [ None ] ) [ 0 ] )
results = page [ ' results ' ]
for result in results :
entry = self . url_result ( result . get ( ' video_url ' ) , ' Rutube ' )
category = try_get ( result , lambda x : x [ ' category ' ] [ ' name ' ] )
entry . update ( {
' id ' : result . get ( ' id ' ) ,
' uploader ' : try_get ( result , lambda x : x [ ' author ' ] [ ' name ' ] ) ,
' uploader_id ' : try_get ( result , lambda x : x [ ' author ' ] [ ' id ' ] ) ,
' upload_date ' : unified_strdate ( result . get ( ' created_ts ' ) ) ,
' title ' : result . get ( ' title ' ) ,
' description ' : result . get ( ' description ' ) ,
' thumbnail ' : result . get ( ' thumbnail_url ' ) ,
' duration ' : int_or_none ( result . get ( ' duration ' ) ) ,
' category ' : [ category ] if category else None ,
' age_limit ' : 18 if result . get ( ' is_adult ' ) else 0 ,
' view_count ' : int_or_none ( result . get ( ' hits ' ) ) ,
' is_live ' : result . get ( ' is_livestream ' ) ,
' webpage_url ' : result . get ( ' video_url ' ) ,
} )
entries . append ( entry )
def _next_page_url ( self , page_num , playlist_id , item_kind ) :
return self . _PAGE_TEMPLATE % ( item_kind , playlist_id , page_num )
if page [ ' has_next ' ] is False :
break
return self . playlist_result ( entries , playlist_id , page [ ' name ' ] )
def _real_extract ( self , url ) :
qs = compat_parse_qs ( compat_urllib_parse_urlparse ( url ) . query )
playlist_kind = qs [ ' pl_type ' ] [ 0 ]
playlist_id = qs [ ' pl_id ' ] [ 0 ]
return self . _extract_playlist ( playlist_id , item_kind = playlist_kind )