@ -5,19 +5,27 @@ import hashlib
import re
import re
from .common import InfoExtractor
from .common import InfoExtractor
from ..compat import compat_parse_qs
from ..compat import (
compat_parse_qs ,
compat_urlparse ,
)
from ..utils import (
from ..utils import (
ExtractorError ,
int_or_none ,
int_or_none ,
float_or_none ,
float_or_none ,
parse_iso8601 ,
smuggle_url ,
strip_jsonp ,
unified_timestamp ,
unified_timestamp ,
unsmuggle_url ,
urlencode_postdata ,
urlencode_postdata ,
)
)
class BiliBiliIE ( InfoExtractor ) :
class BiliBiliIE ( InfoExtractor ) :
_VALID_URL = r ' https?://(?:www \ .|bangumi \ .|)bilibili \ .(?:tv|com)/(?:video/av|anime/v/ )(?P<id> \ d+) '
_VALID_URL = r ' https?://(?:www \ .|bangumi \ .|)bilibili \ .(?:tv|com)/(?:video/av|anime/(?P<anime_id> \ d+)/play# )(?P<id> \ d+) '
_TEST = {
_TESTS = [ {
' url ' : ' http://www.bilibili.tv/video/av1074402/ ' ,
' url ' : ' http://www.bilibili.tv/video/av1074402/ ' ,
' md5 ' : ' 9fa226fe2b8a9a4d5a69b4c6a183417e ' ,
' md5 ' : ' 9fa226fe2b8a9a4d5a69b4c6a183417e ' ,
' info_dict ' : {
' info_dict ' : {
@ -32,25 +40,61 @@ class BiliBiliIE(InfoExtractor):
' uploader ' : ' 菊子桑 ' ,
' uploader ' : ' 菊子桑 ' ,
' uploader_id ' : ' 156160 ' ,
' uploader_id ' : ' 156160 ' ,
} ,
} ,
}
} , {
# Tested in BiliBiliBangumiIE
' url ' : ' http://bangumi.bilibili.com/anime/1869/play#40062 ' ,
' only_matching ' : True ,
} , {
' url ' : ' http://bangumi.bilibili.com/anime/5802/play#100643 ' ,
' md5 ' : ' 3f721ad1e75030cc06faf73587cfec57 ' ,
' info_dict ' : {
' id ' : ' 100643 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' CHAOS;CHILD ' ,
' description ' : ' 如果你是神明,并且能够让妄想成为现实。那你会进行怎么样的妄想?是淫靡的世界?独裁社会?毁灭性的制裁?还是……2015年,涩谷。从6年前发生的大灾害“涩谷地震”之后复兴了的这个街区里新设立的私立高中... ' ,
} ,
' skip ' : ' Geo-restricted to China ' ,
} ]
_APP_KEY = ' 84956560bc028eb7 '
_APP_KEY = ' 84956560bc028eb7 '
_BILIBILI_KEY = ' 94aba54af9065f71de72f5508f1cd42e '
_BILIBILI_KEY = ' 94aba54af9065f71de72f5508f1cd42e '
def _report_error ( self , result ) :
if ' message ' in result :
raise ExtractorError ( ' %s said: %s ' % ( self . IE_NAME , result [ ' message ' ] ) , expected = True )
elif ' code ' in result :
raise ExtractorError ( ' %s returns error %d ' % ( self . IE_NAME , result [ ' code ' ] ) , expected = True )
else :
raise ExtractorError ( ' Can \' t extract Bangumi episode ID ' )
def _real_extract ( self , url ) :
def _real_extract ( self , url ) :
video_id = self . _match_id ( url )
url , smuggled_data = unsmuggle_url ( url , { } )
mobj = re . match ( self . _VALID_URL , url )
video_id = mobj . group ( ' id ' )
anime_id = mobj . group ( ' anime_id ' )
webpage = self . _download_webpage ( url , video_id )
webpage = self . _download_webpage ( url , video_id )
if ' anime/v ' not in url :
if ' anime/ ' not in url :
cid = compat_parse_qs ( self . _search_regex (
cid = compat_parse_qs ( self . _search_regex (
[ r ' EmbedPlayer \ ([^)]+, \ s* " ([^ " ]+) " \ ) ' ,
[ r ' EmbedPlayer \ ([^)]+, \ s* " ([^ " ]+) " \ ) ' ,
r ' <iframe[^>]+src= " https://secure \ .bilibili \ .com/secure,([^ " ]+) " ' ] ,
r ' <iframe[^>]+src= " https://secure \ .bilibili \ .com/secure,([^ " ]+) " ' ] ,
webpage , ' player parameters ' ) ) [ ' cid ' ] [ 0 ]
webpage , ' player parameters ' ) ) [ ' cid ' ] [ 0 ]
else :
else :
if ' no_bangumi_tip ' not in smuggled_data :
self . to_screen ( ' Downloading episode %s . To download all videos in anime %s , re-run youtube-dl with %s ' % (
video_id , anime_id , compat_urlparse . urljoin ( url , ' //bangumi.bilibili.com/anime/ %s ' % anime_id ) ) )
headers = {
' Content-Type ' : ' application/x-www-form-urlencoded; charset=UTF-8 ' ,
}
headers . update ( self . geo_verification_headers ( ) )
js = self . _download_json (
js = self . _download_json (
' http://bangumi.bilibili.com/web_api/get_source ' , video_id ,
' http://bangumi.bilibili.com/web_api/get_source ' , video_id ,
data = urlencode_postdata ( { ' episode_id ' : video_id } ) ,
data = urlencode_postdata ( { ' episode_id ' : video_id } ) ,
headers = { ' Content-Type ' : ' application/x-www-form-urlencoded; charset=UTF-8 ' } )
headers = headers )
if ' result ' not in js :
self . _report_error ( js )
cid = js [ ' result ' ] [ ' cid ' ]
cid = js [ ' result ' ] [ ' cid ' ]
payload = ' appkey= %s &cid= %s &otype=json&quality=2&type=mp4 ' % ( self . _APP_KEY , cid )
payload = ' appkey= %s &cid= %s &otype=json&quality=2&type=mp4 ' % ( self . _APP_KEY , cid )
@ -58,7 +102,11 @@ class BiliBiliIE(InfoExtractor):
video_info = self . _download_json (
video_info = self . _download_json (
' http://interface.bilibili.com/playurl? %s &sign= %s ' % ( payload , sign ) ,
' http://interface.bilibili.com/playurl? %s &sign= %s ' % ( payload , sign ) ,
video_id , note = ' Downloading video info page ' )
video_id , note = ' Downloading video info page ' ,
headers = self . geo_verification_headers ( ) )
if ' durl ' not in video_info :
self . _report_error ( video_info )
entries = [ ]
entries = [ ]
@ -85,7 +133,7 @@ class BiliBiliIE(InfoExtractor):
title = self . _html_search_regex ( ' <h1[^>]+title= " ([^ " ]+) " > ' , webpage , ' title ' )
title = self . _html_search_regex ( ' <h1[^>]+title= " ([^ " ]+) " > ' , webpage , ' title ' )
description = self . _html_search_meta ( ' description ' , webpage )
description = self . _html_search_meta ( ' description ' , webpage )
timestamp = unified_timestamp ( self . _html_search_regex (
timestamp = unified_timestamp ( self . _html_search_regex (
r ' <time[^>]+datetime= " ([^ " ]+) " ' , webpage , ' upload time ' , fatal = Fals e) )
r ' <time[^>]+datetime= " ([^ " ]+) " ' , webpage , ' upload time ' , default = Non e) )
thumbnail = self . _html_search_meta ( [ ' og:image ' , ' thumbnailUrl ' ] , webpage )
thumbnail = self . _html_search_meta ( [ ' og:image ' , ' thumbnailUrl ' ] , webpage )
# TODO 'view_count' requires deobfuscating Javascript
# TODO 'view_count' requires deobfuscating Javascript
@ -99,7 +147,7 @@ class BiliBiliIE(InfoExtractor):
}
}
uploader_mobj = re . search (
uploader_mobj = re . search (
r ' <a[^>]+href= " https?://space \ .bilibili \ .com/(?P<id> \ d+) " [^>]+title= " (?P<name>[^ " ]+) " ' ,
r ' <a[^>]+href= " (?: https?:)? //space\ .bilibili \ .com/(?P<id> \ d+) " [^>]+title= " (?P<name>[^ " ]+) " ' ,
webpage )
webpage )
if uploader_mobj :
if uploader_mobj :
info . update ( {
info . update ( {
@ -123,3 +171,70 @@ class BiliBiliIE(InfoExtractor):
' description ' : description ,
' description ' : description ,
' entries ' : entries ,
' entries ' : entries ,
}
}
class BiliBiliBangumiIE ( InfoExtractor ) :
_VALID_URL = r ' https?://bangumi \ .bilibili \ .com/anime/(?P<id> \ d+) '
IE_NAME = ' bangumi.bilibili.com '
IE_DESC = ' BiliBili番剧 '
_TESTS = [ {
' url ' : ' http://bangumi.bilibili.com/anime/1869 ' ,
' info_dict ' : {
' id ' : ' 1869 ' ,
' title ' : ' 混沌武士 ' ,
' description ' : ' md5:6a9622b911565794c11f25f81d6a97d2 ' ,
} ,
' playlist_count ' : 26 ,
} , {
' url ' : ' http://bangumi.bilibili.com/anime/1869 ' ,
' info_dict ' : {
' id ' : ' 1869 ' ,
' title ' : ' 混沌武士 ' ,
' description ' : ' md5:6a9622b911565794c11f25f81d6a97d2 ' ,
} ,
' playlist ' : [ {
' md5 ' : ' 91da8621454dd58316851c27c68b0c13 ' ,
' info_dict ' : {
' id ' : ' 40062 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' 混沌武士 ' ,
' description ' : ' 故事发生在日本的江户时代。风是一个小酒馆的打工女。一日,酒馆里来了一群恶霸,虽然他们的举动令风十分不满,但是毕竟风只是一届女流,无法对他们采取什么行动,只能在心里嘟哝。这时,酒家里又进来了个“不良份子... ' ,
' timestamp ' : 1414538739 ,
' upload_date ' : ' 20141028 ' ,
' episode ' : ' 疾风怒涛 Tempestuous Temperaments ' ,
' episode_number ' : 1 ,
} ,
} ] ,
' params ' : {
' playlist_items ' : ' 1 ' ,
} ,
} ]
@classmethod
def suitable ( cls , url ) :
return False if BiliBiliIE . suitable ( url ) else super ( BiliBiliBangumiIE , cls ) . suitable ( url )
def _real_extract ( self , url ) :
bangumi_id = self . _match_id ( url )
# Sometimes this API returns a JSONP response
season_info = self . _download_json (
' http://bangumi.bilibili.com/jsonp/seasoninfo/ %s .ver ' % bangumi_id ,
bangumi_id , transform_source = strip_jsonp ) [ ' result ' ]
entries = [ {
' _type ' : ' url_transparent ' ,
' url ' : smuggle_url ( episode [ ' webplay_url ' ] , { ' no_bangumi_tip ' : 1 } ) ,
' ie_key ' : BiliBiliIE . ie_key ( ) ,
' timestamp ' : parse_iso8601 ( episode . get ( ' update_time ' ) , delimiter = ' ' ) ,
' episode ' : episode . get ( ' index_title ' ) ,
' episode_number ' : int_or_none ( episode . get ( ' index ' ) ) ,
} for episode in season_info [ ' episodes ' ] ]
entries = sorted ( entries , key = lambda entry : entry . get ( ' episode_number ' ) )
return self . playlist_result (
entries , bangumi_id ,
season_info . get ( ' bangumi_title ' ) , season_info . get ( ' evaluate ' ) )