@ -1,104 +1,111 @@
# encoding: utf-8
from __future__ import unicode_literals
import re
import itertools
from .common import InfoExtractor
from ..compat import (
compat_HTTPError ,
compat_urlparse ,
)
from ..utils import (
ExtractorError ,
clean_html ,
determine_ext ,
int_or_none ,
parse_iso8601 ,
)
class DramaFeverIE ( InfoExtractor ) :
IE_NAME = ' dramafever '
_VALID_URL = r ' ^https?://(?:www \ .)?dramafever \ .com/drama/(?P<id>[0-9]+/[0-9]+)/ '
_TESTS = [ {
_VALID_URL = r ' https?://(?:www \ .)?dramafever \ .com/drama/(?P<id>[0-9]+/[0-9]+) '
_TEST = {
' url ' : ' http://www.dramafever.com/drama/4512/1/Cooking_with_Shin/ ' ,
' info_dict ' : {
' id ' : ' 4512.1 ' ,
' ext ' : ' flv ' ,
' title ' : ' Cooking with Shin 4512.1 ' ,
' description ' : ' md5:a8eec7942e1664a6896fcd5e1287bfd0 ' ,
' thumbnail ' : ' re:^https?://.* \ .jpg ' ,
' timestamp ' : 1404336058 ,
' upload_date ' : ' 20140702 ' ,
' description ' : ' Served at all special occasions and featured in the hit drama Heirs, Shin cooks Red Bean Rice. ' ,
' duration ' : 343 ,
}
} ]
}
def _real_extract ( self , url ) :
video_id = self . _match_id ( url ) . replace ( " / " , " . " )
consumer_secret = self . _get_consumer_secret ( video_id )
video_id = self . _match_id ( url ) . replace ( ' / ' , ' . ' )
ep_json = self . _download_json (
" http://www.dramafever.com/amp/episode/feed.json?guid= %s " % video_id ,
video_id , note = ' Downloading episode metadata ' ,
errnote = " Video may not be available for your location " ) [ " channel " ] [ " item " ]
try :
feed = self . _download_json (
' http://www.dramafever.com/amp/episode/feed.json?guid= %s ' % video_id ,
video_id , ' Downloading episode JSON ' ) [ ' channel ' ] [ ' item ' ]
except ExtractorError as e :
if isinstance ( e . cause , compat_HTTPError ) :
raise ExtractorError (
' Currently unavailable in your country. ' , expected = True )
raise
title = ep_json [ " media-group " ] [ " media-title " ]
description = ep_json [ " media-group " ] [ " media-description " ]
thumbnail = ep_json [ " media-group " ] [ " media-thumbnail " ] [ " @attributes " ] [ " url " ]
duration = int ( ep_json [ " media-group " ] [ " media-content " ] [ 0 ] [ " @attributes " ] [ " duration " ] )
mobj = re . match ( r " ([0-9]{4})-([0-9]{2})-([0-9]{2}) " , ep_json [ " pubDate " ] )
upload_date = mobj . group ( 1 ) + mobj . group ( 2 ) + mobj . group ( 3 ) if mobj is not None else None
media_group = feed . get ( ' media-group ' , { } )
formats = [ ]
for vid_format in ep_json [ " media-group " ] [ " media-content " ] :
src = vid_format [ " @attributes " ] [ " url " ]
if ' .f4m ' in src :
formats . extend ( self . _extract_f4m_formats ( src , video_id ) )
for media_content in media_group [ ' media-content ' ] :
src = media_content . get ( ' @attributes ' , { } ) . get ( ' url ' )
if not src :
continue
ext = determine_ext ( src )
if ext == ' f4m ' :
formats . extend ( self . _extract_f4m_formats (
src , video_id , f4m_id = ' hds ' ) )
elif ext == ' m3u8 ' :
formats . extend ( self . _extract_m3u8_formats (
src , video_id , ' mp4 ' , m3u8_id = ' hls ' ) )
else :
formats . append ( {
' url ' : src ,
} )
self . _sort_formats ( formats )
video_subtitles = self . extract_subtitles ( video_id , consumer_secret )
title = media_group . get ( ' media-title ' )
description = media_group . get ( ' media-description ' )
duration = int_or_none ( media_group [ ' media-content ' ] [ 0 ] . get ( ' @attributes ' , { } ) . get ( ' duration ' ) )
thumbnail = self . _proto_relative_url (
media_group . get ( ' media-thumbnail ' , { } ) . get ( ' @attributes ' , { } ) . get ( ' url ' ) )
timestamp = parse_iso8601 ( feed . get ( ' pubDate ' ) , ' ' )
subtitles = { }
for media_subtitle in media_group . get ( ' media-subTitle ' , [ ] ) :
lang = media_subtitle . get ( ' @attributes ' , { } ) . get ( ' lang ' )
href = media_subtitle . get ( ' @attributes ' , { } ) . get ( ' href ' )
if not lang or not href :
continue
subtitles [ lang ] = [ {
' ext ' : ' ttml ' ,
' url ' : href ,
} ]
return {
' id ' : video_id ,
' title ' : title ,
' description ' : description ,
' thumbnail ' : thumbnail ,
' upload_date ' : upload_date ,
' tim estamp ' : tim estamp ,
' duration ' : duration ,
' formats ' : formats ,
' subtitles ' : video_subtitles ,
' subtitles ' : subtitles ,
}
def _get_consumer_secret ( self , video_id ) :
df_js = self . _download_webpage (
" http://www.dramafever.com/static/126960d/v2/js/plugins/jquery.threadedcomments.js " , video_id )
return self . _search_regex ( r " ' cs ' : ' ([0-9a-zA-Z]+) ' " , df_js , " cs " )
def _get_episodes ( self , series_id , consumer_secret , episode_filter = None ) :
_PAGE_SIZE = 60
curr_page = 1
max_pages = curr_page + 1
results = [ ]
while max_pages > = curr_page :
page_url = " http://www.dramafever.com/api/4/episode/series/?cs= %s &series_id= %s &page_size= %d &page_number= %d " % \
( consumer_secret , series_id , _PAGE_SIZE , curr_page )
series = self . _download_json (
page_url , series_id , note = " Downloading series json page # %d " % curr_page )
max_pages = series [ ' num_pages ' ]
results . extend ( [ ep for ep in series [ ' value ' ] if episode_filter is None or episode_filter ( ep ) ] )
curr_page + = 1
return results
def _get_subtitles ( self , video_id , consumer_secret ) :
res = None
info = self . _get_episodes (
video_id . split ( " . " ) [ 0 ] , consumer_secret ,
episode_filter = lambda x : x [ ' guid ' ] == video_id )
if len ( info ) == 1 and info [ 0 ] [ ' subfile ' ] != ' ' :
res = { ' en ' : [ { ' url ' : info [ 0 ] [ ' subfile ' ] , ' ext ' : ' srt ' } ] }
return res
class DramaFeverSeriesIE ( DramaFeverIE ) :
class DramaFeverSeriesIE ( InfoExtractor ) :
IE_NAME = ' dramafever:series '
_VALID_URL = r ' ^ https?://(?:www\ .)?dramafever \ .com/drama/(?P<id>[0-9]+)/ \ d*[a-zA-Z_][a-zA-Z0-9_]*/ '
_VALID_URL = r ' https?://(?:www \ .)?dramafever \ .com/drama/(?P<id>[0-9]+)(?:/(?:(?! \ d).+)?)?$ '
_TESTS = [ {
' url ' : ' http://www.dramafever.com/drama/4512/Cooking_with_Shin/ ' ,
' info_dict ' : {
' id ' : ' 4512 ' ,
' title ' : ' Cooking with Shin ' ,
' description ' : ' Professional chef and cooking instructor Shin Kim takes some of the delicious dishes featured in your favorite dramas and shows you how to make them right at home. ' ,
' description ' : ' md5:84a3f26e3cdc3fb7f500211b3593b5c1 ' ,
} ,
' playlist_count ' : 4 ,
} , {
@ -106,25 +113,48 @@ class DramaFeverSeriesIE(DramaFeverIE):
' info_dict ' : {
' id ' : ' 124 ' ,
' title ' : ' IRIS ' ,
' description ' : ' Lee Byung Hun and Kim Tae Hee star in this powerhouse drama and ratings megahit of action, intrigue and romance. ' ,
' description ' : ' md5:b3a30e587cf20c59bd1c01ec0ee1b862 ' ,
} ,
' playlist_count ' : 20 ,
} ]
_CONSUMER_SECRET = ' DA59dtVXYLxajktV '
_PAGE_SIZE = 5 # max is 60 (see http://api.drama9.com/#get--api-4-episode-series-)
def _get_consumer_secret ( self , video_id ) :
mainjs = self . _download_webpage (
' http://www.dramafever.com/static/51afe95/df2014/scripts/main.js ' ,
video_id , ' Downloading main.js ' , fatal = False )
if not mainjs :
return self . _CONSUMER_SECRET
return self . _search_regex (
r " var \ s+cs \ s*= \ s* ' ([^ ' ]+) ' " , mainjs ,
' consumer secret ' , default = self . _CONSUMER_SECRET )
def _real_extract ( self , url ) :
series_id = self . _match_id ( url )
consumer_secret = self . _get_consumer_secret ( series_id )
series_json = self . _download_json (
" http://www.dramafever.com/api/4/series/query/?cs= %s &series_id= %s " % ( consumer_secret , series_id ) ,
series_id , note = ' Downloading series metadata ' ) [ " series " ] [ series_id ]
series = self . _download_json (
' http://www.dramafever.com/api/4/series/query/?cs= %s &series_id= %s '
% ( consumer_secret , series_id ) ,
series_id , ' Downloading series JSON ' ) [ ' series ' ] [ series_id ]
title = series_json [ " name " ]
description = series_json [ " description_short " ]
title = clean_html ( series [ ' name ' ] )
description = clean_html ( series . get ( ' description ' ) or series . get ( ' description_short ' ) )
episodes = self . _get_episodes ( series_id , consumer_secret )
entries = [ ]
for ep in episodes :
entries . append ( self . url_result (
' http://www.dramafever.com %s ' % ep [ ' episode_url ' ] , ' DramaFever ' , ep [ ' guid ' ] ) )
for page_num in itertools . count ( 1 ) :
episodes = self . _download_json (
' http://www.dramafever.com/api/4/episode/series/?cs= %s &series_id= %s &page_size= %d &page_number= %d '
% ( consumer_secret , series_id , self . _PAGE_SIZE , page_num ) ,
series_id , ' Downloading episodes JSON page # %d ' % page_num )
for episode in episodes . get ( ' value ' , [ ] ) :
entries . append ( self . url_result (
compat_urlparse . urljoin ( url , episode [ ' episode_url ' ] ) ,
' DramaFever ' , episode . get ( ' guid ' ) ) )
if page_num == episodes [ ' num_pages ' ] :
break
return self . playlist_result ( entries , series_id , title , description )