|
@ -2,6 +2,7 @@ |
|
|
from __future__ import unicode_literals |
|
|
from __future__ import unicode_literals |
|
|
|
|
|
|
|
|
import hashlib |
|
|
import hashlib |
|
|
|
|
|
import itertools |
|
|
import math |
|
|
import math |
|
|
import os |
|
|
import os |
|
|
import random |
|
|
import random |
|
@ -19,6 +20,7 @@ from ..compat import ( |
|
|
from ..utils import ( |
|
|
from ..utils import ( |
|
|
ExtractorError, |
|
|
ExtractorError, |
|
|
ohdave_rsa_encrypt, |
|
|
ohdave_rsa_encrypt, |
|
|
|
|
|
remove_start, |
|
|
sanitized_Request, |
|
|
sanitized_Request, |
|
|
urlencode_postdata, |
|
|
urlencode_postdata, |
|
|
url_basename, |
|
|
url_basename, |
|
@ -295,6 +297,13 @@ class IqiyiIE(InfoExtractor): |
|
|
}, |
|
|
}, |
|
|
}], |
|
|
}], |
|
|
'expected_warnings': ['Needs a VIP account for full video'], |
|
|
'expected_warnings': ['Needs a VIP account for full video'], |
|
|
|
|
|
}, { |
|
|
|
|
|
'url': 'http://www.iqiyi.com/a_19rrhb8ce1.html', |
|
|
|
|
|
'info_dict': { |
|
|
|
|
|
'id': '202918101', |
|
|
|
|
|
'title': '灌篮高手 国语版', |
|
|
|
|
|
}, |
|
|
|
|
|
'playlist_count': 101, |
|
|
}] |
|
|
}] |
|
|
|
|
|
|
|
|
_FORMATS_MAP = [ |
|
|
_FORMATS_MAP = [ |
|
@ -526,9 +535,49 @@ class IqiyiIE(InfoExtractor): |
|
|
enc_key = '6ab6d0280511493ba85594779759d4ed' |
|
|
enc_key = '6ab6d0280511493ba85594779759d4ed' |
|
|
return enc_key |
|
|
return enc_key |
|
|
|
|
|
|
|
|
|
|
|
def _extract_playlist(self, webpage): |
|
|
|
|
|
PAGE_SIZE = 50 |
|
|
|
|
|
|
|
|
|
|
|
links = re.findall( |
|
|
|
|
|
r'<a[^>]+class="site-piclist_pic_link"[^>]+href="(http://www\.iqiyi\.com/.+\.html)"', |
|
|
|
|
|
webpage) |
|
|
|
|
|
if not links: |
|
|
|
|
|
return |
|
|
|
|
|
|
|
|
|
|
|
album_id = self._search_regex( |
|
|
|
|
|
r'albumId\s*:\s*(\d+),', webpage, 'album ID') |
|
|
|
|
|
album_title = self._search_regex( |
|
|
|
|
|
r'data-share-title="([^"]+)"', webpage, 'album title', fatal=False) |
|
|
|
|
|
|
|
|
|
|
|
entries = list(map(self.url_result, links)) |
|
|
|
|
|
|
|
|
|
|
|
# Start from 2 because links in the first page are already on webpage |
|
|
|
|
|
for page_num in itertools.count(2): |
|
|
|
|
|
pagelist_page = self._download_webpage( |
|
|
|
|
|
'http://cache.video.qiyi.com/jp/avlist/%s/%d/%d/' % (album_id, page_num, PAGE_SIZE), |
|
|
|
|
|
album_id, |
|
|
|
|
|
note='Download playlist page %d' % page_num, |
|
|
|
|
|
errnote='Failed to download playlist page %d' % page_num) |
|
|
|
|
|
pagelist = self._parse_json( |
|
|
|
|
|
remove_start(pagelist_page, 'var tvInfoJs='), album_id) |
|
|
|
|
|
vlist = pagelist['data']['vlist'] |
|
|
|
|
|
for item in vlist: |
|
|
|
|
|
entries.append(self.url_result(item['vurl'])) |
|
|
|
|
|
if len(vlist) < PAGE_SIZE: |
|
|
|
|
|
break |
|
|
|
|
|
|
|
|
|
|
|
return self.playlist_result(entries, album_id, album_title) |
|
|
|
|
|
|
|
|
def _real_extract(self, url): |
|
|
def _real_extract(self, url): |
|
|
webpage = self._download_webpage( |
|
|
webpage = self._download_webpage( |
|
|
url, 'temp_id', note='download video page') |
|
|
url, 'temp_id', note='download video page') |
|
|
|
|
|
|
|
|
|
|
|
# There's no simple way to determine whether an URL is a playlist or not |
|
|
|
|
|
# So detect it |
|
|
|
|
|
playlist_result = self._extract_playlist(webpage) |
|
|
|
|
|
if playlist_result: |
|
|
|
|
|
return playlist_result |
|
|
|
|
|
|
|
|
tvid = self._search_regex( |
|
|
tvid = self._search_regex( |
|
|
r'data-player-tvid\s*=\s*[\'"](\d+)', webpage, 'tvid') |
|
|
r'data-player-tvid\s*=\s*[\'"](\d+)', webpage, 'tvid') |
|
|
video_id = self._search_regex( |
|
|
video_id = self._search_regex( |
|
|