|
|
@ -1,38 +1,26 @@ |
|
|
|
# coding: utf-8 |
|
|
|
from __future__ import unicode_literals |
|
|
|
|
|
|
|
import re |
|
|
|
import hashlib |
|
|
|
import random |
|
|
|
|
|
|
|
from ..compat import compat_urlparse |
|
|
|
from ..compat import compat_str |
|
|
|
from .common import InfoExtractor |
|
|
|
from ..utils import parse_duration |
|
|
|
|
|
|
|
|
|
|
|
class JamendoBaseIE(InfoExtractor): |
|
|
|
def _extract_meta(self, webpage, fatal=True): |
|
|
|
title = self._og_search_title( |
|
|
|
webpage, default=None) or self._search_regex( |
|
|
|
r'<title>([^<]+)', webpage, |
|
|
|
'title', default=None) |
|
|
|
if title: |
|
|
|
title = self._search_regex( |
|
|
|
r'(.+?)\s*\|\s*Jamendo Music', title, 'title', default=None) |
|
|
|
if not title: |
|
|
|
title = self._html_search_meta( |
|
|
|
'name', webpage, 'title', fatal=fatal) |
|
|
|
mobj = re.search(r'(.+) - (.+)', title or '') |
|
|
|
artist, second = mobj.groups() if mobj else [None] * 2 |
|
|
|
return title, artist, second |
|
|
|
|
|
|
|
|
|
|
|
class JamendoIE(JamendoBaseIE): |
|
|
|
from ..utils import ( |
|
|
|
clean_html, |
|
|
|
int_or_none, |
|
|
|
try_get, |
|
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
class JamendoIE(InfoExtractor): |
|
|
|
_VALID_URL = r'''(?x) |
|
|
|
https?:// |
|
|
|
(?: |
|
|
|
licensing\.jamendo\.com/[^/]+| |
|
|
|
(?:www\.)?jamendo\.com |
|
|
|
) |
|
|
|
/track/(?P<id>[0-9]+)/(?P<display_id>[^/?#&]+) |
|
|
|
/track/(?P<id>[0-9]+)(?:/(?P<display_id>[^/?#&]+))? |
|
|
|
''' |
|
|
|
_TESTS = [{ |
|
|
|
'url': 'https://www.jamendo.com/track/196219/stories-from-emona-i', |
|
|
@ -45,7 +33,9 @@ class JamendoIE(JamendoBaseIE): |
|
|
|
'artist': 'Maya Filipič', |
|
|
|
'track': 'Stories from Emona I', |
|
|
|
'duration': 210, |
|
|
|
'thumbnail': r're:^https?://.*\.jpg' |
|
|
|
'thumbnail': r're:^https?://.*\.jpg', |
|
|
|
'timestamp': 1217438117, |
|
|
|
'upload_date': '20080730', |
|
|
|
} |
|
|
|
}, { |
|
|
|
'url': 'https://licensing.jamendo.com/en/track/1496667/energetic-rock', |
|
|
@ -53,15 +43,19 @@ class JamendoIE(JamendoBaseIE): |
|
|
|
}] |
|
|
|
|
|
|
|
def _real_extract(self, url): |
|
|
|
mobj = self._VALID_URL_RE.match(url) |
|
|
|
track_id = mobj.group('id') |
|
|
|
display_id = mobj.group('display_id') |
|
|
|
|
|
|
|
webpage = self._download_webpage( |
|
|
|
'https://www.jamendo.com/track/%s/%s' % (track_id, display_id), |
|
|
|
display_id) |
|
|
|
|
|
|
|
title, artist, track = self._extract_meta(webpage) |
|
|
|
track_id, display_id = self._VALID_URL_RE.match(url).groups() |
|
|
|
webpage = self._download_webpage(url, track_id) |
|
|
|
models = self._parse_json(self._html_search_regex( |
|
|
|
r"data-bundled-models='([^']+)", |
|
|
|
webpage, 'bundled models'), track_id) |
|
|
|
track = models['track']['models'][0] |
|
|
|
title = track_name = track['name'] |
|
|
|
get_model = lambda x: try_get(models, lambda y: y[x]['models'][0], dict) or {} |
|
|
|
artist = get_model('artist') |
|
|
|
artist_name = artist.get('name') |
|
|
|
if artist_name: |
|
|
|
title = '%s - %s' % (artist_name, title) |
|
|
|
album = get_model('album') |
|
|
|
|
|
|
|
formats = [{ |
|
|
|
'url': 'https://%s.jamendo.com/?trackid=%s&format=%s&from=app-97dab294' |
|
|
@ -77,31 +71,58 @@ class JamendoIE(JamendoBaseIE): |
|
|
|
))] |
|
|
|
self._sort_formats(formats) |
|
|
|
|
|
|
|
thumbnail = self._html_search_meta( |
|
|
|
'image', webpage, 'thumbnail', fatal=False) |
|
|
|
duration = parse_duration(self._search_regex( |
|
|
|
r'<span[^>]+itemprop=["\']duration["\'][^>]+content=["\'](.+?)["\']', |
|
|
|
webpage, 'duration', fatal=False)) |
|
|
|
urls = [] |
|
|
|
thumbnails = [] |
|
|
|
for _, covers in track.get('cover', {}).items(): |
|
|
|
for cover_id, cover_url in covers.items(): |
|
|
|
if not cover_url or cover_url in urls: |
|
|
|
continue |
|
|
|
urls.append(cover_url) |
|
|
|
size = int_or_none(cover_id.lstrip('size')) |
|
|
|
thumbnails.append({ |
|
|
|
'id': cover_id, |
|
|
|
'url': cover_url, |
|
|
|
'width': size, |
|
|
|
'height': size, |
|
|
|
}) |
|
|
|
|
|
|
|
tags = [] |
|
|
|
for tag in track.get('tags', []): |
|
|
|
tag_name = tag.get('name') |
|
|
|
if not tag_name: |
|
|
|
continue |
|
|
|
tags.append(tag_name) |
|
|
|
|
|
|
|
stats = track.get('stats') or {} |
|
|
|
|
|
|
|
return { |
|
|
|
'id': track_id, |
|
|
|
'display_id': display_id, |
|
|
|
'thumbnail': thumbnail, |
|
|
|
'thumbnails': thumbnails, |
|
|
|
'title': title, |
|
|
|
'duration': duration, |
|
|
|
'artist': artist, |
|
|
|
'track': track, |
|
|
|
'formats': formats |
|
|
|
'description': track.get('description'), |
|
|
|
'duration': int_or_none(track.get('duration')), |
|
|
|
'artist': artist_name, |
|
|
|
'track': track_name, |
|
|
|
'album': album.get('name'), |
|
|
|
'formats': formats, |
|
|
|
'license': '-'.join(track.get('licenseCC', [])) or None, |
|
|
|
'timestamp': int_or_none(track.get('dateCreated')), |
|
|
|
'view_count': int_or_none(stats.get('listenedAll')), |
|
|
|
'like_count': int_or_none(stats.get('favorited')), |
|
|
|
'average_rating': int_or_none(stats.get('averageNote')), |
|
|
|
'tags': tags, |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
class JamendoAlbumIE(JamendoBaseIE): |
|
|
|
_VALID_URL = r'https?://(?:www\.)?jamendo\.com/album/(?P<id>[0-9]+)/(?P<display_id>[\w-]+)' |
|
|
|
class JamendoAlbumIE(InfoExtractor): |
|
|
|
_VALID_URL = r'https?://(?:www\.)?jamendo\.com/album/(?P<id>[0-9]+)' |
|
|
|
_TEST = { |
|
|
|
'url': 'https://www.jamendo.com/album/121486/duck-on-cover', |
|
|
|
'info_dict': { |
|
|
|
'id': '121486', |
|
|
|
'title': 'Shearer - Duck On Cover' |
|
|
|
'title': 'Duck On Cover', |
|
|
|
'description': 'md5:c2920eaeef07d7af5b96d7c64daf1239', |
|
|
|
}, |
|
|
|
'playlist': [{ |
|
|
|
'md5': 'e1a2fcb42bda30dfac990212924149a8', |
|
|
@ -111,6 +132,8 @@ class JamendoAlbumIE(JamendoBaseIE): |
|
|
|
'title': 'Shearer - Warmachine', |
|
|
|
'artist': 'Shearer', |
|
|
|
'track': 'Warmachine', |
|
|
|
'timestamp': 1368089771, |
|
|
|
'upload_date': '20130509', |
|
|
|
} |
|
|
|
}, { |
|
|
|
'md5': '1f358d7b2f98edfe90fd55dac0799d50', |
|
|
@ -120,6 +143,8 @@ class JamendoAlbumIE(JamendoBaseIE): |
|
|
|
'title': 'Shearer - Without Your Ghost', |
|
|
|
'artist': 'Shearer', |
|
|
|
'track': 'Without Your Ghost', |
|
|
|
'timestamp': 1368089771, |
|
|
|
'upload_date': '20130509', |
|
|
|
} |
|
|
|
}], |
|
|
|
'params': { |
|
|
@ -127,24 +152,35 @@ class JamendoAlbumIE(JamendoBaseIE): |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
def _call_api(self, resource, resource_id): |
|
|
|
path = '/api/%ss' % resource |
|
|
|
rand = compat_str(random.random()) |
|
|
|
return self._download_json( |
|
|
|
'https://www.jamendo.com' + path, resource_id, query={ |
|
|
|
'id[]': resource_id, |
|
|
|
}, headers={ |
|
|
|
'X-Jam-Call': '$%s*%s~' % (hashlib.sha1((path + rand).encode()).hexdigest(), rand) |
|
|
|
})[0] |
|
|
|
|
|
|
|
def _real_extract(self, url): |
|
|
|
mobj = self._VALID_URL_RE.match(url) |
|
|
|
album_id = mobj.group('id') |
|
|
|
|
|
|
|
webpage = self._download_webpage(url, mobj.group('display_id')) |
|
|
|
|
|
|
|
title, artist, album = self._extract_meta(webpage, fatal=False) |
|
|
|
|
|
|
|
entries = [{ |
|
|
|
'_type': 'url_transparent', |
|
|
|
'url': compat_urlparse.urljoin(url, m.group('path')), |
|
|
|
'ie_key': JamendoIE.ie_key(), |
|
|
|
'id': self._search_regex( |
|
|
|
r'/track/(\d+)', m.group('path'), 'track id', default=None), |
|
|
|
'artist': artist, |
|
|
|
'album': album, |
|
|
|
} for m in re.finditer( |
|
|
|
r'<a[^>]+href=(["\'])(?P<path>(?:(?!\1).)+)\1[^>]+class=["\'][^>]*js-trackrow-albumpage-link', |
|
|
|
webpage)] |
|
|
|
|
|
|
|
return self.playlist_result(entries, album_id, title) |
|
|
|
album_id = self._match_id(url) |
|
|
|
album = self._call_api('album', album_id) |
|
|
|
album_name = album.get('name') |
|
|
|
|
|
|
|
entries = [] |
|
|
|
for track in album.get('tracks', []): |
|
|
|
track_id = track.get('id') |
|
|
|
if not track_id: |
|
|
|
continue |
|
|
|
track_id = compat_str(track_id) |
|
|
|
entries.append({ |
|
|
|
'_type': 'url_transparent', |
|
|
|
'url': 'https://www.jamendo.com/track/' + track_id, |
|
|
|
'ie_key': JamendoIE.ie_key(), |
|
|
|
'id': track_id, |
|
|
|
'album': album_name, |
|
|
|
}) |
|
|
|
|
|
|
|
return self.playlist_result( |
|
|
|
entries, album_id, album_name, |
|
|
|
clean_html(try_get(album, lambda x: x['description']['en'], compat_str))) |