[empflix] Add new extractor

11 years ago · 877bea9ce1
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -72,6 +72,7 @@ from .ehow import EHowIE
 from .eighttracks import EightTracksIE
 from .eitb import EitbIE
 from .elpais import ElPaisIE
 from .empflix import EmpflixIE
 from .engadget import EngadgetIE
 from .escapist import EscapistIE
 from .everyonesmixtape import EveryonesMixtapeIE
--- a/youtube_dl/extractor/empflix.py
+++ b/youtube_dl/extractor/empflix.py
@ -0,0 +1,46 @@
 import re

 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
 )

 class EmpflixIE(InfoExtractor):
    _VALID_URL = r'^https?://www\.empflix\.com/videos/(?P<videoid>[^\.]+)\.html'
    _TEST = {
        u'url': u'http://www.empflix.com/videos/Amateur-Finger-Fuck-33051.html',
        u'file': u'Amateur-Finger-Fuck-33051.flv',
        u'md5': u'5e5cc160f38ca9857f318eb97146e13e',
        u'info_dict': {
            u"title": u"Amateur Finger Fuck",
            u"age_limit": 18,
        }
    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)

        video_id = mobj.group('videoid')

        # Get webpage content
        webpage = self._download_webpage(url, video_id)

        age_limit = self._rta_search(webpage)

        # Get the video title
        video_title = self._html_search_regex(r'name="title" value="(?P<title>[^"]*)"',
            webpage, u'title').strip()

        cfg_url = self._html_search_regex(r'flashvars\.config = escape\("([^"]+)"',
            webpage, u'flashvars.config').strip()

        cfg_xml = self._download_xml(cfg_url, video_id, note=u'Downloading metadata')
        video_url = cfg_xml.find('videoLink').text

        info = {'id': video_id,
                'url': video_url,
                'title': video_title,
                'ext': 'flv',
                'age_limit': age_limit}

        return [info]