[thesixtyone] Add new extractor (closes #3781)

10 years ago · 5e69192ef7
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -371,6 +371,7 @@ from .tenplay import TenPlayIE
 from .testurl import TestURLIE
 from .tf1 import TF1IE
 from .theplatform import ThePlatformIE
 from .thesixtyone import TheSixtyOneIE
 from .thisav import ThisAVIE
 from .tinypic import TinyPicIE
 from .tlc import TlcIE, TlcDeIE
--- a/youtube_dl/extractor/thesixtyone.py
+++ b/youtube_dl/extractor/thesixtyone.py
@ -0,0 +1,100 @@
 # coding: utf-8
 from __future__ import unicode_literals

 import json
 import re

 from .common import InfoExtractor
 from ..utils import unified_strdate


 class TheSixtyOneIE(InfoExtractor):
    _VALID_URL = r'''(?x)https?://(?:www\.)?thesixtyone\.com/
        (?:.*?/)*
        (?:
            s|
            song/comments/list|
            song
        )/(?P<id>[A-Za-z0-9]+)/?$'''
    _SONG_URL_TEMPLATE = 'http://thesixtyone.com/s/{0:}'
    _SONG_FILE_URL_TEMPLATE = 'http://{audio_server:}.thesixtyone.com/thesixtyone_production/audio/{0:}_stream'
    _THUMBNAIL_URL_TEMPLATE = '{photo_base_url:}_desktop'
    _TESTS = [
        {
            'url': 'http://www.thesixtyone.com/s/SrE3zD7s1jt/',
            'md5': '821cc43b0530d3222e3e2b70bb4622ea',
            'info_dict': {
                'id': 'SrE3zD7s1jt',
                'ext': 'mp3',
                'title': 'CASIO - Unicorn War Mixtape',
                'thumbnail': 're:^https?://.*_desktop$',
                'upload_date': '20071217',
                'duration': 3208,
            }
        },
        {
            'url': 'http://www.thesixtyone.com/song/comments/list/SrE3zD7s1jt',
            'only_matching': True,
        },
        {
            'url': 'http://www.thesixtyone.com/s/ULoiyjuJWli#/s/SrE3zD7s1jt/',
            'only_matching': True,
        },
        {
            'url': 'http://www.thesixtyone.com/#/s/SrE3zD7s1jt/',
            'only_matching': True,
        },
        {
            'url': 'http://www.thesixtyone.com/song/SrE3zD7s1jt/',
            'only_matching': True,
        },
    ]

    _DECODE_MAP = {
        "x": "a",
        "m": "b",
        "w": "c",
        "q": "d",
        "n": "e",
        "p": "f",
        "a": "0",
        "h": "1",
        "e": "2",
        "u": "3",
        "s": "4",
        "i": "5",
        "o": "6",
        "y": "7",
        "r": "8",
        "c": "9"
    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        song_id = mobj.group('id')

        webpage = self._download_webpage(
            self._SONG_URL_TEMPLATE.format(song_id), song_id)

        song_data = json.loads(self._search_regex(
            r'"%s":\s(\{.*?\})' % song_id, webpage, 'song_data'))
        keys = [self._DECODE_MAP.get(s, s) for s in song_data['key']]
        url = self._SONG_FILE_URL_TEMPLATE.format(
            "".join(reversed(keys)), **song_data)

        formats = [{
            'format_id': 'sd',
            'url': url,
            'ext': 'mp3',
        }]

        return {
            'id': song_id,
            'title': '{artist:} - {name:}'.format(**song_data),
            'formats': formats,
            'comment_count': song_data.get('comments_count'),
            'duration': song_data.get('play_time'),
            'like_count': song_data.get('score'),
            'thumbnail': self._THUMBNAIL_URL_TEMPLATE.format(**song_data),
            'upload_date': unified_strdate(song_data.get('publish_date')),
        }