You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

238 lines
7.9 KiB

10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import random
  4. import time
  5. import re
  6. from .common import InfoExtractor
  7. from ..utils import (
  8. strip_jsonp,
  9. unescapeHTML,
  10. js_to_json,
  11. )
  12. from ..compat import compat_urllib_request
  13. class QQMusicIE(InfoExtractor):
  14. IE_NAME = 'qqmusic'
  15. _VALID_URL = r'http://y.qq.com/#type=song&mid=(?P<id>[0-9A-Za-z]+)'
  16. _TESTS = [{
  17. 'url': 'http://y.qq.com/#type=song&mid=004295Et37taLD',
  18. 'md5': 'bed90b6db2a7a7a7e11bc585f471f63a',
  19. 'info_dict': {
  20. 'id': '004295Et37taLD',
  21. 'ext': 'm4a',
  22. 'title': '可惜没如果',
  23. 'upload_date': '20141227',
  24. 'creator': '林俊杰',
  25. 'description': 'md5:4348ff1dd24036906baa7b6f973f8d30',
  26. }
  27. }]
  28. # Reference: m_r_GetRUin() in top_player.js
  29. # http://imgcache.gtimg.cn/music/portal_v3/y/top_player.js
  30. @staticmethod
  31. def m_r_get_ruin():
  32. curMs = int(time.time() * 1000) % 1000
  33. return int(round(random.random() * 2147483647) * curMs % 1E10)
  34. def _real_extract(self, url):
  35. mid = self._match_id(url)
  36. detail_info_page = self._download_webpage(
  37. 'http://s.plcloud.music.qq.com/fcgi-bin/fcg_yqq_song_detail_info.fcg?songmid=%s&play=0' % mid,
  38. mid, note='Download song detail info',
  39. errnote='Unable to get song detail info', encoding='gbk')
  40. song_name = self._html_search_regex(
  41. r"songname:\s*'([^']+)'", detail_info_page, 'song name')
  42. publish_time = self._html_search_regex(
  43. r'发行时间:(\d{4}-\d{2}-\d{2})', detail_info_page,
  44. 'publish time', default=None)
  45. if publish_time:
  46. publish_time = publish_time.replace('-', '')
  47. singer = self._html_search_regex(
  48. r"singer:\s*'([^']+)", detail_info_page, 'singer', default=None)
  49. lrc_content = self._html_search_regex(
  50. r'<div class="content" id="lrc_content"[^<>]*>([^<>]+)</div>',
  51. detail_info_page, 'LRC lyrics', default=None)
  52. guid = self.m_r_get_ruin()
  53. vkey = self._download_json(
  54. 'http://base.music.qq.com/fcgi-bin/fcg_musicexpress.fcg?json=3&guid=%s' % guid,
  55. mid, note='Retrieve vkey', errnote='Unable to get vkey',
  56. transform_source=strip_jsonp)['key']
  57. song_url = 'http://cc.stream.qqmusic.qq.com/C200%s.m4a?vkey=%s&guid=%s&fromtag=0' % (mid, vkey, guid)
  58. return {
  59. 'id': mid,
  60. 'url': song_url,
  61. 'title': song_name,
  62. 'upload_date': publish_time,
  63. 'creator': singer,
  64. 'description': lrc_content,
  65. }
  66. class QQPlaylistBaseIE(InfoExtractor):
  67. @staticmethod
  68. def qq_static_url(category, mid):
  69. return 'http://y.qq.com/y/static/%s/%s/%s/%s.html' % (category, mid[-2], mid[-1], mid)
  70. @classmethod
  71. def get_entries_from_page(cls, page):
  72. entries = []
  73. for item in re.findall(r'class="data"[^<>]*>([^<>]+)</', page):
  74. song_mid = unescapeHTML(item).split('|')[-5]
  75. entries.append(cls.url_result(
  76. 'http://y.qq.com/#type=song&mid=' + song_mid, 'QQMusic',
  77. song_mid))
  78. return entries
  79. class QQMusicSingerIE(QQPlaylistBaseIE):
  80. IE_NAME = 'qqmusic:singer'
  81. _VALID_URL = r'http://y.qq.com/#type=singer&mid=(?P<id>[0-9A-Za-z]+)'
  82. _TEST = {
  83. 'url': 'http://y.qq.com/#type=singer&mid=001BLpXF2DyJe2',
  84. 'info_dict': {
  85. 'id': '001BLpXF2DyJe2',
  86. 'title': '林俊杰',
  87. 'description': 'md5:2a222d89ba4455a3af19940c0481bb78',
  88. },
  89. 'playlist_count': 12,
  90. }
  91. def _real_extract(self, url):
  92. mid = self._match_id(url)
  93. singer_page = self._download_webpage(
  94. self.qq_static_url('singer', mid), mid, 'Download singer page')
  95. entries = self.get_entries_from_page(singer_page)
  96. singer_name = self._html_search_regex(
  97. r"singername\s*:\s*'([^']+)'", singer_page, 'singer name',
  98. default=None)
  99. singer_id = self._html_search_regex(
  100. r"singerid\s*:\s*'([0-9]+)'", singer_page, 'singer id',
  101. default=None)
  102. singer_desc = None
  103. if singer_id:
  104. req = compat_urllib_request.Request(
  105. 'http://s.plcloud.music.qq.com/fcgi-bin/fcg_get_singer_desc.fcg?utf8=1&outCharset=utf-8&format=xml&singerid=%s' % singer_id)
  106. req.add_header(
  107. 'Referer', 'http://s.plcloud.music.qq.com/xhr_proxy_utf8.html')
  108. singer_desc_page = self._download_xml(
  109. req, mid, 'Donwload singer description XML')
  110. singer_desc = singer_desc_page.find('./data/info/desc').text
  111. return self.playlist_result(entries, mid, singer_name, singer_desc)
  112. class QQMusicAlbumIE(QQPlaylistBaseIE):
  113. IE_NAME = 'qqmusic:album'
  114. _VALID_URL = r'http://y.qq.com/#type=album&mid=(?P<id>[0-9A-Za-z]+)'
  115. _TEST = {
  116. 'url': 'http://y.qq.com/#type=album&mid=000gXCTb2AhRR1&play=0',
  117. 'info_dict': {
  118. 'id': '000gXCTb2AhRR1',
  119. 'title': '我们都是这样长大的',
  120. 'description': 'md5:d216c55a2d4b3537fe4415b8767d74d6',
  121. },
  122. 'playlist_count': 4,
  123. }
  124. def _real_extract(self, url):
  125. mid = self._match_id(url)
  126. album_page = self._download_webpage(
  127. self.qq_static_url('album', mid), mid, 'Download album page')
  128. entries = self.get_entries_from_page(album_page)
  129. album_name = self._html_search_regex(
  130. r"albumname\s*:\s*'([^']+)',", album_page, 'album name',
  131. default=None)
  132. album_detail = self._html_search_regex(
  133. r'<div class="album_detail close_detail">\s*<p>((?:[^<>]+(?:<br />)?)+)</p>',
  134. album_page, 'album details', default=None)
  135. return self.playlist_result(entries, mid, album_name, album_detail)
  136. class QQMusicToplistIE(QQPlaylistBaseIE):
  137. IE_NAME = 'qqmusic:toplist'
  138. _VALID_URL = r'http://y\.qq\.com/#type=toplist&p=(?P<id>(top|global)_[0-9]+)'
  139. _TESTS = [{
  140. 'url': 'http://y.qq.com/#type=toplist&p=global_12',
  141. 'info_dict': {
  142. 'id': 'global_12',
  143. 'title': 'itunes榜',
  144. },
  145. 'playlist_count': 10,
  146. }, {
  147. 'url': 'http://y.qq.com/#type=toplist&p=top_6',
  148. 'info_dict': {
  149. 'id': 'top_6',
  150. 'title': 'QQ音乐巅峰榜·欧美',
  151. },
  152. 'playlist_count': 100,
  153. }, {
  154. 'url': 'http://y.qq.com/#type=toplist&p=global_5',
  155. 'info_dict': {
  156. 'id': 'global_5',
  157. 'title': '韩国mnet排行榜',
  158. },
  159. 'playlist_count': 50,
  160. }]
  161. @staticmethod
  162. def strip_qq_jsonp(code):
  163. return js_to_json(re.sub(r'^MusicJsonCallback\((.*?)\)/\*.+?\*/$', r'\1', code))
  164. def _real_extract(self, url):
  165. list_id = self._match_id(url)
  166. list_type, num_id = list_id.split("_")
  167. list_page = self._download_webpage(
  168. "http://y.qq.com/y/static/toplist/index/%s.html" % list_id,
  169. list_id, 'Download toplist page')
  170. entries = []
  171. if list_type == 'top':
  172. jsonp_url = "http://y.qq.com/y/static/toplist/json/top/%s/1.js" % num_id
  173. else:
  174. jsonp_url = "http://y.qq.com/y/static/toplist/json/global/%s/1_1.js" % num_id
  175. toplist_json = self._download_json(
  176. jsonp_url, list_id, note='Retrieve toplist json',
  177. errnote='Unable to get toplist json', transform_source=self.strip_qq_jsonp)
  178. for song in toplist_json['l']:
  179. s = song['s']
  180. song_mid = s.split("|")[20]
  181. entries.append(self.url_result(
  182. 'http://y.qq.com/#type=song&mid=' + song_mid, 'QQMusic',
  183. song_mid))
  184. list_name = self._html_search_regex(
  185. r'<h2 id="top_name">([^\']+)</h2>', list_page, 'top list name',
  186. default=None)
  187. return self.playlist_result(entries, list_id, list_name)