[hentaistigma] Add new extractor

10 years ago · 33c7ff861e
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -109,6 +109,7 @@ from .googleplus import GooglePlusIE
 from .googlesearch import GoogleSearchIE
 from .hark import HarkIE
 from .helsinki import HelsinkiIE
 from .hentaistigma import HentaiStigmaIE
 from .hotnewhiphop import HotNewHipHopIE
 from .howcast import HowcastIE
 from .huffpost import HuffPostIE
--- a/youtube_dl/extractor/hentaistigma.py
+++ b/youtube_dl/extractor/hentaistigma.py
@ -0,0 +1,43 @@
 import re

 from .common import InfoExtractor

 class HentaiStigmaIE(InfoExtractor):
    _VALID_URL = r'^https?://hentai\.animestigma\.com/(?P<videoid>[^/]+)'
    _TEST = {
        u'url': u'http://hentai.animestigma.com/inyouchuu-etsu-bonus/',
        u'file': u'inyouchuu-etsu-bonus.mp4',
        u'md5': u'4e3d07422a68a4cc363d8f57c8bf0d23',
        u'info_dict': {
            u"title": u"Inyouchuu Etsu Bonus",
            u"age_limit": 18,
        }
    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)

        video_id = mobj.group('videoid')

        # Get webpage content
        webpage = self._download_webpage(url, video_id)

        # Get the video title
        video_title = self._html_search_regex(r'<h2 class="posttitle"><a[^>]*>([^<]+)</a>',
            webpage, u'title').strip()

        # Get the wrapper url
        wrap_url = self._html_search_regex(r'<iframe src="([^"]+mp4)"', webpage, u'wrapper url')

        # Get wrapper content
        wrap_webpage = self._download_webpage(wrap_url, video_id)

        video_url = self._html_search_regex(r'clip:\s*{\s*url: "([^"]*)"', wrap_webpage, u'video url')

        info = {'id': video_id,
                'url': video_url,
                'title': video_title,
                'format': 'mp4',
                'age_limit': 18}

        return [info]