You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

367 lines
13 KiB

9 years ago
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. import time
  5. import itertools
  6. from .common import InfoExtractor
  7. from .naver import NaverBaseIE
  8. from ..compat import compat_str
  9. from ..utils import (
  10. ExtractorError,
  11. merge_dicts,
  12. remove_start,
  13. try_get,
  14. urlencode_postdata,
  15. )
  16. class VLiveIE(NaverBaseIE):
  17. IE_NAME = 'vlive'
  18. _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/video/(?P<id>[0-9]+)'
  19. _NETRC_MACHINE = 'vlive'
  20. _TESTS = [{
  21. 'url': 'http://www.vlive.tv/video/1326',
  22. 'md5': 'cc7314812855ce56de70a06a27314983',
  23. 'info_dict': {
  24. 'id': '1326',
  25. 'ext': 'mp4',
  26. 'title': "[V LIVE] Girl's Day's Broadcast",
  27. 'creator': "Girl's Day",
  28. 'view_count': int,
  29. 'uploader_id': 'muploader_a',
  30. },
  31. }, {
  32. 'url': 'http://www.vlive.tv/video/16937',
  33. 'info_dict': {
  34. 'id': '16937',
  35. 'ext': 'mp4',
  36. 'title': '[V LIVE] 첸백시 걍방',
  37. 'creator': 'EXO',
  38. 'view_count': int,
  39. 'subtitles': 'mincount:12',
  40. 'uploader_id': 'muploader_j',
  41. },
  42. 'params': {
  43. 'skip_download': True,
  44. },
  45. }, {
  46. 'url': 'https://www.vlive.tv/video/129100',
  47. 'md5': 'ca2569453b79d66e5b919e5d308bff6b',
  48. 'info_dict': {
  49. 'id': '129100',
  50. 'ext': 'mp4',
  51. 'title': '[V LIVE] [BTS+] Run BTS! 2019 - EP.71 :: Behind the scene',
  52. 'creator': 'BTS+',
  53. 'view_count': int,
  54. 'subtitles': 'mincount:10',
  55. },
  56. 'skip': 'This video is only available for CH+ subscribers',
  57. }]
  58. @classmethod
  59. def suitable(cls, url):
  60. return False if VLivePlaylistIE.suitable(url) else super(VLiveIE, cls).suitable(url)
  61. def _real_initialize(self):
  62. self._login()
  63. def _login(self):
  64. email, password = self._get_login_info()
  65. if None in (email, password):
  66. return
  67. def is_logged_in():
  68. login_info = self._download_json(
  69. 'https://www.vlive.tv/auth/loginInfo', None,
  70. note='Downloading login info',
  71. headers={'Referer': 'https://www.vlive.tv/home'})
  72. return try_get(
  73. login_info, lambda x: x['message']['login'], bool) or False
  74. LOGIN_URL = 'https://www.vlive.tv/auth/email/login'
  75. self._request_webpage(
  76. LOGIN_URL, None, note='Downloading login cookies')
  77. self._download_webpage(
  78. LOGIN_URL, None, note='Logging in',
  79. data=urlencode_postdata({'email': email, 'pwd': password}),
  80. headers={
  81. 'Referer': LOGIN_URL,
  82. 'Content-Type': 'application/x-www-form-urlencoded'
  83. })
  84. if not is_logged_in():
  85. raise ExtractorError('Unable to log in', expected=True)
  86. def _real_extract(self, url):
  87. video_id = self._match_id(url)
  88. webpage = self._download_webpage(
  89. 'https://www.vlive.tv/video/%s' % video_id, video_id)
  90. VIDEO_PARAMS_RE = r'\bvlive\.video\.init\(([^)]+)'
  91. VIDEO_PARAMS_FIELD = 'video params'
  92. params = self._parse_json(self._search_regex(
  93. VIDEO_PARAMS_RE, webpage, VIDEO_PARAMS_FIELD, default=''), video_id,
  94. transform_source=lambda s: '[' + s + ']', fatal=False)
  95. if not params or len(params) < 7:
  96. params = self._search_regex(
  97. VIDEO_PARAMS_RE, webpage, VIDEO_PARAMS_FIELD)
  98. params = [p.strip(r'"') for p in re.split(r'\s*,\s*', params)]
  99. status, long_video_id, key = params[2], params[5], params[6]
  100. status = remove_start(status, 'PRODUCT_')
  101. if status in ('LIVE_ON_AIR', 'BIG_EVENT_ON_AIR'):
  102. return self._live(video_id, webpage)
  103. elif status in ('VOD_ON_AIR', 'BIG_EVENT_INTRO'):
  104. return self._replay(video_id, webpage, long_video_id, key)
  105. if status == 'LIVE_END':
  106. raise ExtractorError('Uploading for replay. Please wait...',
  107. expected=True)
  108. elif status == 'COMING_SOON':
  109. raise ExtractorError('Coming soon!', expected=True)
  110. elif status == 'CANCELED':
  111. raise ExtractorError('We are sorry, '
  112. 'but the live broadcast has been canceled.',
  113. expected=True)
  114. elif status == 'ONLY_APP':
  115. raise ExtractorError('Unsupported video type', expected=True)
  116. else:
  117. raise ExtractorError('Unknown status %s' % status)
  118. def _get_common_fields(self, webpage):
  119. title = self._og_search_title(webpage)
  120. creator = self._html_search_regex(
  121. r'<div[^>]+class="info_area"[^>]*>\s*(?:<em[^>]*>.*?</em\s*>\s*)?<a\s+[^>]*>([^<]+)',
  122. webpage, 'creator', fatal=False)
  123. thumbnail = self._og_search_thumbnail(webpage)
  124. return {
  125. 'title': title,
  126. 'creator': creator,
  127. 'thumbnail': thumbnail,
  128. }
  129. def _live(self, video_id, webpage):
  130. init_page = self._download_init_page(video_id)
  131. live_params = self._search_regex(
  132. r'"liveStreamInfo"\s*:\s*(".*"),',
  133. init_page, 'live stream info')
  134. live_params = self._parse_json(live_params, video_id)
  135. live_params = self._parse_json(live_params, video_id)
  136. formats = []
  137. for vid in live_params.get('resolutions', []):
  138. formats.extend(self._extract_m3u8_formats(
  139. vid['cdnUrl'], video_id, 'mp4',
  140. m3u8_id=vid.get('name'),
  141. fatal=False, live=True))
  142. self._sort_formats(formats)
  143. info = self._get_common_fields(webpage)
  144. info.update({
  145. 'title': self._live_title(info['title']),
  146. 'id': video_id,
  147. 'formats': formats,
  148. 'is_live': True,
  149. })
  150. return info
  151. def _replay(self, video_id, webpage, long_video_id, key):
  152. if '' in (long_video_id, key):
  153. init_page = self._download_init_page(video_id)
  154. video_info = self._parse_json(self._search_regex(
  155. (r'(?s)oVideoStatus\s*=\s*({.+?})\s*</script',
  156. r'(?s)oVideoStatus\s*=\s*({.+})'), init_page, 'video info'),
  157. video_id)
  158. if video_info.get('status') == 'NEED_CHANNEL_PLUS':
  159. self.raise_login_required(
  160. 'This video is only available for CH+ subscribers')
  161. long_video_id, key = video_info['vid'], video_info['inkey']
  162. return merge_dicts(
  163. self._get_common_fields(webpage),
  164. self._extract_video_info(video_id, long_video_id, key))
  165. def _download_init_page(self, video_id):
  166. return self._download_webpage(
  167. 'https://www.vlive.tv/video/init/view',
  168. video_id, note='Downloading live webpage',
  169. data=urlencode_postdata({'videoSeq': video_id}),
  170. headers={
  171. 'Referer': 'https://www.vlive.tv/video/%s' % video_id,
  172. 'Content-Type': 'application/x-www-form-urlencoded'
  173. })
  174. class VLiveChannelIE(InfoExtractor):
  175. IE_NAME = 'vlive:channel'
  176. _VALID_URL = r'https?://channels\.vlive\.tv/(?P<id>[0-9A-Z]+)'
  177. _TEST = {
  178. 'url': 'http://channels.vlive.tv/FCD4B',
  179. 'info_dict': {
  180. 'id': 'FCD4B',
  181. 'title': 'MAMAMOO',
  182. },
  183. 'playlist_mincount': 110
  184. }
  185. _APP_ID = '8c6cc7b45d2568fb668be6e05b6e5a3b'
  186. def _real_extract(self, url):
  187. channel_code = self._match_id(url)
  188. webpage = self._download_webpage(
  189. 'http://channels.vlive.tv/%s/video' % channel_code, channel_code)
  190. app_id = None
  191. app_js_url = self._search_regex(
  192. r'<script[^>]+src=(["\'])(?P<url>http.+?/app\.js.*?)\1',
  193. webpage, 'app js', default=None, group='url')
  194. if app_js_url:
  195. app_js = self._download_webpage(
  196. app_js_url, channel_code, 'Downloading app JS', fatal=False)
  197. if app_js:
  198. app_id = self._search_regex(
  199. r'Global\.VFAN_APP_ID\s*=\s*[\'"]([^\'"]+)[\'"]',
  200. app_js, 'app id', default=None)
  201. app_id = app_id or self._APP_ID
  202. channel_info = self._download_json(
  203. 'http://api.vfan.vlive.tv/vproxy/channelplus/decodeChannelCode',
  204. channel_code, note='Downloading decode channel code',
  205. query={
  206. 'app_id': app_id,
  207. 'channelCode': channel_code,
  208. '_': int(time.time())
  209. })
  210. channel_seq = channel_info['result']['channelSeq']
  211. channel_name = None
  212. entries = []
  213. for page_num in itertools.count(1):
  214. video_list = self._download_json(
  215. 'http://api.vfan.vlive.tv/vproxy/channelplus/getChannelVideoList',
  216. channel_code, note='Downloading channel list page #%d' % page_num,
  217. query={
  218. 'app_id': app_id,
  219. 'channelSeq': channel_seq,
  220. # Large values of maxNumOfRows (~300 or above) may cause
  221. # empty responses (see [1]), e.g. this happens for [2] that
  222. # has more than 300 videos.
  223. # 1. https://github.com/ytdl-org/youtube-dl/issues/13830
  224. # 2. http://channels.vlive.tv/EDBF.
  225. 'maxNumOfRows': 100,
  226. '_': int(time.time()),
  227. 'pageNo': page_num
  228. }
  229. )
  230. if not channel_name:
  231. channel_name = try_get(
  232. video_list,
  233. lambda x: x['result']['channelInfo']['channelName'],
  234. compat_str)
  235. videos = try_get(
  236. video_list, lambda x: x['result']['videoList'], list)
  237. if not videos:
  238. break
  239. for video in videos:
  240. video_id = video.get('videoSeq')
  241. if not video_id:
  242. continue
  243. video_id = compat_str(video_id)
  244. entries.append(
  245. self.url_result(
  246. 'http://www.vlive.tv/video/%s' % video_id,
  247. ie=VLiveIE.ie_key(), video_id=video_id))
  248. return self.playlist_result(
  249. entries, channel_code, channel_name)
  250. class VLivePlaylistIE(InfoExtractor):
  251. IE_NAME = 'vlive:playlist'
  252. _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/video/(?P<video_id>[0-9]+)/playlist/(?P<id>[0-9]+)'
  253. _VIDEO_URL_TEMPLATE = 'http://www.vlive.tv/video/%s'
  254. _TESTS = [{
  255. # regular working playlist
  256. 'url': 'https://www.vlive.tv/video/117956/playlist/117963',
  257. 'info_dict': {
  258. 'id': '117963',
  259. 'title': '아이돌룸(IDOL ROOM) 41회 - (여자)아이들'
  260. },
  261. 'playlist_mincount': 10
  262. }, {
  263. # playlist with no playlistVideoSeqs
  264. 'url': 'http://www.vlive.tv/video/22867/playlist/22912',
  265. 'info_dict': {
  266. 'id': '22867',
  267. 'ext': 'mp4',
  268. 'title': '[V LIVE] Valentine Day Message from MINA',
  269. 'creator': 'TWICE',
  270. 'view_count': int
  271. },
  272. 'params': {
  273. 'skip_download': True,
  274. }
  275. }]
  276. def _build_video_result(self, video_id, message):
  277. self.to_screen(message)
  278. return self.url_result(
  279. self._VIDEO_URL_TEMPLATE % video_id,
  280. ie=VLiveIE.ie_key(), video_id=video_id)
  281. def _real_extract(self, url):
  282. mobj = re.match(self._VALID_URL, url)
  283. video_id, playlist_id = mobj.group('video_id', 'id')
  284. if self._downloader.params.get('noplaylist'):
  285. return self._build_video_result(
  286. video_id,
  287. 'Downloading just video %s because of --no-playlist'
  288. % video_id)
  289. self.to_screen(
  290. 'Downloading playlist %s - add --no-playlist to just download video'
  291. % playlist_id)
  292. webpage = self._download_webpage(
  293. 'http://www.vlive.tv/video/%s/playlist/%s'
  294. % (video_id, playlist_id), playlist_id)
  295. raw_item_ids = self._search_regex(
  296. r'playlistVideoSeqs\s*=\s*(\[[^]]+\])', webpage,
  297. 'playlist video seqs', default=None, fatal=False)
  298. if not raw_item_ids:
  299. return self._build_video_result(
  300. video_id,
  301. 'Downloading just video %s because no playlist was found'
  302. % video_id)
  303. item_ids = self._parse_json(raw_item_ids, playlist_id)
  304. entries = [
  305. self.url_result(
  306. self._VIDEO_URL_TEMPLATE % item_id, ie=VLiveIE.ie_key(),
  307. video_id=compat_str(item_id))
  308. for item_id in item_ids]
  309. playlist_name = self._html_search_regex(
  310. r'<div[^>]+class="[^"]*multicam_playlist[^>]*>\s*<h3[^>]+>([^<]+)',
  311. webpage, 'playlist title', fatal=False)
  312. return self.playlist_result(entries, playlist_id, playlist_name)