You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

263 lines
9.0 KiB

9 years ago
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. import time
  5. import itertools
  6. from .common import InfoExtractor
  7. from ..compat import (
  8. compat_urllib_parse_urlencode,
  9. compat_str,
  10. )
  11. from ..utils import (
  12. dict_get,
  13. ExtractorError,
  14. float_or_none,
  15. int_or_none,
  16. remove_start,
  17. try_get,
  18. urlencode_postdata,
  19. )
  20. class VLiveIE(InfoExtractor):
  21. IE_NAME = 'vlive'
  22. _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/video/(?P<id>[0-9]+)'
  23. _TESTS = [{
  24. 'url': 'http://www.vlive.tv/video/1326',
  25. 'md5': 'cc7314812855ce56de70a06a27314983',
  26. 'info_dict': {
  27. 'id': '1326',
  28. 'ext': 'mp4',
  29. 'title': "[V LIVE] Girl's Day's Broadcast",
  30. 'creator': "Girl's Day",
  31. 'view_count': int,
  32. },
  33. }, {
  34. 'url': 'http://www.vlive.tv/video/16937',
  35. 'info_dict': {
  36. 'id': '16937',
  37. 'ext': 'mp4',
  38. 'title': '[V LIVE] 첸백시 걍방',
  39. 'creator': 'EXO',
  40. 'view_count': int,
  41. 'subtitles': 'mincount:12',
  42. },
  43. 'params': {
  44. 'skip_download': True,
  45. },
  46. }]
  47. def _real_extract(self, url):
  48. video_id = self._match_id(url)
  49. webpage = self._download_webpage(
  50. 'http://www.vlive.tv/video/%s' % video_id, video_id)
  51. VIDEO_PARAMS_RE = r'\bvlive\.video\.init\(([^)]+)'
  52. VIDEO_PARAMS_FIELD = 'video params'
  53. params = self._parse_json(self._search_regex(
  54. VIDEO_PARAMS_RE, webpage, VIDEO_PARAMS_FIELD, default=''), video_id,
  55. transform_source=lambda s: '[' + s + ']', fatal=False)
  56. if not params or len(params) < 7:
  57. params = self._search_regex(
  58. VIDEO_PARAMS_RE, webpage, VIDEO_PARAMS_FIELD)
  59. params = [p.strip(r'"') for p in re.split(r'\s*,\s*', params)]
  60. status, long_video_id, key = params[2], params[5], params[6]
  61. status = remove_start(status, 'PRODUCT_')
  62. if status == 'LIVE_ON_AIR' or status == 'BIG_EVENT_ON_AIR':
  63. return self._live(video_id, webpage)
  64. elif status == 'VOD_ON_AIR' or status == 'BIG_EVENT_INTRO':
  65. if long_video_id and key:
  66. return self._replay(video_id, webpage, long_video_id, key)
  67. else:
  68. status = 'COMING_SOON'
  69. if status == 'LIVE_END':
  70. raise ExtractorError('Uploading for replay. Please wait...',
  71. expected=True)
  72. elif status == 'COMING_SOON':
  73. raise ExtractorError('Coming soon!', expected=True)
  74. elif status == 'CANCELED':
  75. raise ExtractorError('We are sorry, '
  76. 'but the live broadcast has been canceled.',
  77. expected=True)
  78. else:
  79. raise ExtractorError('Unknown status %s' % status)
  80. def _get_common_fields(self, webpage):
  81. title = self._og_search_title(webpage)
  82. creator = self._html_search_regex(
  83. r'<div[^>]+class="info_area"[^>]*>\s*<a\s+[^>]*>([^<]+)',
  84. webpage, 'creator', fatal=False)
  85. thumbnail = self._og_search_thumbnail(webpage)
  86. return {
  87. 'title': title,
  88. 'creator': creator,
  89. 'thumbnail': thumbnail,
  90. }
  91. def _live(self, video_id, webpage):
  92. init_page = self._download_webpage(
  93. 'http://www.vlive.tv/video/init/view',
  94. video_id, note='Downloading live webpage',
  95. data=urlencode_postdata({'videoSeq': video_id}),
  96. headers={
  97. 'Referer': 'http://www.vlive.tv/video/%s' % video_id,
  98. 'Content-Type': 'application/x-www-form-urlencoded'
  99. })
  100. live_params = self._search_regex(
  101. r'"liveStreamInfo"\s*:\s*(".*"),',
  102. init_page, 'live stream info')
  103. live_params = self._parse_json(live_params, video_id)
  104. live_params = self._parse_json(live_params, video_id)
  105. formats = []
  106. for vid in live_params.get('resolutions', []):
  107. formats.extend(self._extract_m3u8_formats(
  108. vid['cdnUrl'], video_id, 'mp4',
  109. m3u8_id=vid.get('name'),
  110. fatal=False, live=True))
  111. self._sort_formats(formats)
  112. info = self._get_common_fields(webpage)
  113. info.update({
  114. 'title': self._live_title(info['title']),
  115. 'id': video_id,
  116. 'formats': formats,
  117. 'is_live': True,
  118. })
  119. return info
  120. def _replay(self, video_id, webpage, long_video_id, key):
  121. playinfo = self._download_json(
  122. 'http://global.apis.naver.com/rmcnmv/rmcnmv/vod_play_videoInfo.json?%s'
  123. % compat_urllib_parse_urlencode({
  124. 'videoId': long_video_id,
  125. 'key': key,
  126. 'ptc': 'http',
  127. 'doct': 'json', # document type (xml or json)
  128. 'cpt': 'vtt', # captions type (vtt or ttml)
  129. }), video_id)
  130. formats = [{
  131. 'url': vid['source'],
  132. 'format_id': vid.get('encodingOption', {}).get('name'),
  133. 'abr': float_or_none(vid.get('bitrate', {}).get('audio')),
  134. 'vbr': float_or_none(vid.get('bitrate', {}).get('video')),
  135. 'width': int_or_none(vid.get('encodingOption', {}).get('width')),
  136. 'height': int_or_none(vid.get('encodingOption', {}).get('height')),
  137. 'filesize': int_or_none(vid.get('size')),
  138. } for vid in playinfo.get('videos', {}).get('list', []) if vid.get('source')]
  139. self._sort_formats(formats)
  140. view_count = int_or_none(playinfo.get('meta', {}).get('count'))
  141. subtitles = {}
  142. for caption in playinfo.get('captions', {}).get('list', []):
  143. lang = dict_get(caption, ('locale', 'language', 'country', 'label'))
  144. if lang and caption.get('source'):
  145. subtitles[lang] = [{
  146. 'ext': 'vtt',
  147. 'url': caption['source']}]
  148. info = self._get_common_fields(webpage)
  149. info.update({
  150. 'id': video_id,
  151. 'formats': formats,
  152. 'view_count': view_count,
  153. 'subtitles': subtitles,
  154. })
  155. return info
  156. class VLiveChannelIE(InfoExtractor):
  157. IE_NAME = 'vlive:channel'
  158. _VALID_URL = r'https?://channels\.vlive\.tv/(?P<id>[0-9A-Z]+)'
  159. _TEST = {
  160. 'url': 'http://channels.vlive.tv/FCD4B',
  161. 'info_dict': {
  162. 'id': 'FCD4B',
  163. 'title': 'MAMAMOO',
  164. },
  165. 'playlist_mincount': 110
  166. }
  167. _APP_ID = '8c6cc7b45d2568fb668be6e05b6e5a3b'
  168. def _real_extract(self, url):
  169. channel_code = self._match_id(url)
  170. webpage = self._download_webpage(
  171. 'http://channels.vlive.tv/%s/video' % channel_code, channel_code)
  172. app_id = None
  173. app_js_url = self._search_regex(
  174. r'<script[^>]+src=(["\'])(?P<url>http.+?/app\.js.*?)\1',
  175. webpage, 'app js', default=None, group='url')
  176. if app_js_url:
  177. app_js = self._download_webpage(
  178. app_js_url, channel_code, 'Downloading app JS', fatal=False)
  179. if app_js:
  180. app_id = self._search_regex(
  181. r'Global\.VFAN_APP_ID\s*=\s*[\'"]([^\'"]+)[\'"]',
  182. app_js, 'app id', default=None)
  183. app_id = app_id or self._APP_ID
  184. channel_info = self._download_json(
  185. 'http://api.vfan.vlive.tv/vproxy/channelplus/decodeChannelCode',
  186. channel_code, note='Downloading decode channel code',
  187. query={
  188. 'app_id': app_id,
  189. 'channelCode': channel_code,
  190. '_': int(time.time())
  191. })
  192. channel_seq = channel_info['result']['channelSeq']
  193. channel_name = None
  194. entries = []
  195. for page_num in itertools.count(1):
  196. video_list = self._download_json(
  197. 'http://api.vfan.vlive.tv/vproxy/channelplus/getChannelVideoList',
  198. channel_code, note='Downloading channel list page #%d' % page_num,
  199. query={
  200. 'app_id': app_id,
  201. 'channelSeq': channel_seq,
  202. 'maxNumOfRows': 1000,
  203. '_': int(time.time()),
  204. 'pageNo': page_num
  205. }
  206. )
  207. if not channel_name:
  208. channel_name = try_get(
  209. video_list,
  210. lambda x: x['result']['channelInfo']['channelName'],
  211. compat_str)
  212. videos = try_get(
  213. video_list, lambda x: x['result']['videoList'], list)
  214. if not videos:
  215. break
  216. for video in videos:
  217. video_id = video.get('videoSeq')
  218. if not video_id:
  219. continue
  220. video_id = compat_str(video_id)
  221. entries.append(
  222. self.url_result(
  223. 'http://www.vlive.tv/video/%s' % video_id,
  224. ie=VLiveIE.ie_key(), video_id=video_id))
  225. return self.playlist_result(
  226. entries, channel_code, channel_name)