You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

206 lines
7.8 KiB

10 years ago
10 years ago
10 years ago
10 years ago
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. from .common import InfoExtractor
  5. from ..utils import (
  6. ExtractorError,
  7. int_or_none,
  8. parse_iso8601,
  9. )
  10. class MySpaceIE(InfoExtractor):
  11. _VALID_URL = r'https?://myspace\.com/([^/]+)/(?P<mediatype>video/[^/]+/|music/song/.*?)(?P<id>\d+)'
  12. _TESTS = [
  13. {
  14. 'url': 'https://myspace.com/fiveminutestothestage/video/little-big-town/109594919',
  15. 'info_dict': {
  16. 'id': '109594919',
  17. 'ext': 'flv',
  18. 'title': 'Little Big Town',
  19. 'description': 'This country quartet was all smiles while playing a sold out show at the Pacific Amphitheatre in Orange County, California.',
  20. 'uploader': 'Five Minutes to the Stage',
  21. 'uploader_id': 'fiveminutestothestage',
  22. 'timestamp': 1414108751,
  23. 'upload_date': '20141023',
  24. },
  25. 'params': {
  26. # rtmp download
  27. 'skip_download': True,
  28. },
  29. },
  30. # songs
  31. {
  32. 'url': 'https://myspace.com/killsorrow/music/song/of-weakened-soul...-93388656-103880681',
  33. 'info_dict': {
  34. 'id': '93388656',
  35. 'ext': 'flv',
  36. 'title': 'Of weakened soul...',
  37. 'uploader': 'Killsorrow',
  38. 'uploader_id': 'killsorrow',
  39. },
  40. 'params': {
  41. # rtmp download
  42. 'skip_download': True,
  43. },
  44. }, {
  45. 'add_ie': ['Vevo'],
  46. 'url': 'https://myspace.com/threedaysgrace/music/song/animal-i-have-become-28400208-28218041',
  47. 'info_dict': {
  48. 'id': 'USZM20600099',
  49. 'ext': 'mp4',
  50. 'title': 'Animal I Have Become',
  51. 'uploader': 'Three Days Grace',
  52. 'timestamp': int,
  53. 'upload_date': '20060502',
  54. },
  55. 'skip': 'VEVO is only available in some countries',
  56. }, {
  57. 'add_ie': ['Youtube'],
  58. 'url': 'https://myspace.com/starset2/music/song/first-light-95799905-106964426',
  59. 'info_dict': {
  60. 'id': 'ypWvQgnJrSU',
  61. 'ext': 'mp4',
  62. 'title': 'Starset - First Light',
  63. 'description': 'md5:2d5db6c9d11d527683bcda818d332414',
  64. 'uploader': 'Yumi K',
  65. 'uploader_id': 'SorenPromotions',
  66. 'upload_date': '20140725',
  67. }
  68. },
  69. ]
  70. def _real_extract(self, url):
  71. mobj = re.match(self._VALID_URL, url)
  72. video_id = mobj.group('id')
  73. webpage = self._download_webpage(url, video_id)
  74. player_url = self._search_regex(
  75. r'playerSwf":"([^"?]*)', webpage, 'player URL')
  76. def rtmp_format_from_stream_url(stream_url, width=None, height=None):
  77. rtmp_url, play_path = stream_url.split(';', 1)
  78. return {
  79. 'format_id': 'rtmp',
  80. 'url': rtmp_url,
  81. 'play_path': play_path,
  82. 'player_url': player_url,
  83. 'protocol': 'rtmp',
  84. 'ext': 'flv',
  85. 'width': width,
  86. 'height': height,
  87. }
  88. if mobj.group('mediatype').startswith('music/song'):
  89. # songs don't store any useful info in the 'context' variable
  90. song_data = self._search_regex(
  91. r'''<button.*data-song-id=(["\'])%s\1.*''' % video_id,
  92. webpage, 'song_data', default=None, group=0)
  93. if song_data is None:
  94. # some songs in an album are not playable
  95. self.report_warning(
  96. '%s: No downloadable song on this page' % video_id)
  97. return
  98. def search_data(name):
  99. return self._search_regex(
  100. r'''data-%s=([\'"])(?P<data>.*?)\1''' % name,
  101. song_data, name, default='', group='data')
  102. stream_url = search_data('stream-url')
  103. if not stream_url:
  104. vevo_id = search_data('vevo-id')
  105. youtube_id = search_data('youtube-id')
  106. if vevo_id:
  107. self.to_screen('Vevo video detected: %s' % vevo_id)
  108. return self.url_result('vevo:%s' % vevo_id, ie='Vevo')
  109. elif youtube_id:
  110. self.to_screen('Youtube video detected: %s' % youtube_id)
  111. return self.url_result(youtube_id, ie='Youtube')
  112. else:
  113. raise ExtractorError(
  114. 'Found song but don\'t know how to download it')
  115. return {
  116. 'id': video_id,
  117. 'title': self._og_search_title(webpage),
  118. 'uploader': search_data('artist-name'),
  119. 'uploader_id': search_data('artist-username'),
  120. 'thumbnail': self._og_search_thumbnail(webpage),
  121. 'duration': int_or_none(search_data('duration')),
  122. 'formats': [rtmp_format_from_stream_url(stream_url)]
  123. }
  124. else:
  125. video = self._parse_json(self._search_regex(
  126. r'context = ({.*?});', webpage, 'context'),
  127. video_id)['video']
  128. formats = []
  129. hls_stream_url = video.get('hlsStreamUrl')
  130. if hls_stream_url:
  131. formats.append({
  132. 'format_id': 'hls',
  133. 'url': hls_stream_url,
  134. 'protocol': 'm3u8_native',
  135. 'ext': 'mp4',
  136. })
  137. stream_url = video.get('streamUrl')
  138. if stream_url:
  139. formats.append(rtmp_format_from_stream_url(
  140. stream_url,
  141. int_or_none(video.get('width')),
  142. int_or_none(video.get('height'))))
  143. self._sort_formats(formats)
  144. return {
  145. 'id': video_id,
  146. 'title': video['title'],
  147. 'description': video.get('description'),
  148. 'thumbnail': video.get('imageUrl'),
  149. 'uploader': video.get('artistName'),
  150. 'uploader_id': video.get('artistUsername'),
  151. 'duration': int_or_none(video.get('duration')),
  152. 'timestamp': parse_iso8601(video.get('dateAdded')),
  153. 'formats': formats,
  154. }
  155. class MySpaceAlbumIE(InfoExtractor):
  156. IE_NAME = 'MySpace:album'
  157. _VALID_URL = r'https?://myspace\.com/([^/]+)/music/album/(?P<title>.*-)(?P<id>\d+)'
  158. _TESTS = [{
  159. 'url': 'https://myspace.com/starset2/music/album/transmissions-19455773',
  160. 'info_dict': {
  161. 'title': 'Transmissions',
  162. 'id': '19455773',
  163. },
  164. 'playlist_count': 14,
  165. 'skip': 'this album is only available in some countries',
  166. }, {
  167. 'url': 'https://myspace.com/killsorrow/music/album/the-demo-18596029',
  168. 'info_dict': {
  169. 'title': 'The Demo',
  170. 'id': '18596029',
  171. },
  172. 'playlist_count': 5,
  173. }]
  174. def _real_extract(self, url):
  175. mobj = re.match(self._VALID_URL, url)
  176. playlist_id = mobj.group('id')
  177. display_id = mobj.group('title') + playlist_id
  178. webpage = self._download_webpage(url, display_id)
  179. tracks_paths = re.findall(r'"music:song" content="(.*?)"', webpage)
  180. if not tracks_paths:
  181. raise ExtractorError(
  182. '%s: No songs found, try using proxy' % display_id,
  183. expected=True)
  184. entries = [
  185. self.url_result(t_path, ie=MySpaceIE.ie_key())
  186. for t_path in tracks_paths]
  187. return {
  188. '_type': 'playlist',
  189. 'id': playlist_id,
  190. 'display_id': display_id,
  191. 'title': self._og_search_title(webpage),
  192. 'entries': entries,
  193. }