Merge remote-tracking branch 'MikeCol/playvid_extract'

11 years ago · 777ac90791
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -175,6 +175,7 @@ from .ooyala import OoyalaIE
 from .orf import ORFIE
 from .pbs import PBSIE
 from .photobucket import PhotobucketIE
 from .playvid import PlayvidIE
 from .podomatic import PodomaticIE
 from .pornhd import PornHdIE
 from .pornhub import PornHubIE
--- a/youtube_dl/extractor/playvid.py
+++ b/youtube_dl/extractor/playvid.py
@ -0,0 +1,85 @@
 from __future__ import unicode_literals

 import re

 from .common import InfoExtractor
 from ..utils import (
    compat_urllib_parse,
    determine_ext,
 )

 class PlayvidIE(InfoExtractor):

    _VALID_URL = r'^(?:https?://)?www\.playvid\.com/watch(\?v=|/)(?P<id>.+?)(#|$)'
    _TEST = {
        'url': 'http://www.playvid.com/watch/agbDDi7WZTV',
        'file': 'agbDDi7WZTV.mp4',
        'md5': '44930f8afa616efdf9482daf4fe53e1e',
        'info_dict': {
            'title': 'Michelle Lewin in Miami Beach',
            'duration': 240,
            'age_limit': 18,
        }
    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')

        webpage = self._download_webpage(url, video_id)

        self.report_extraction(video_id)

        video_title = None
        duration = None
        video_thumbnail = None
        formats = []

        # most of the information is stored in the flashvars
        flashvars_match = re.search(r'flashvars="(.+?)"',webpage)

        if flashvars_match:
            infos = compat_urllib_parse.unquote(flashvars_match.group(1)).split(r'&amp;')
            for info in infos:
                videovars_match = re.match(r'^video_vars\[(.+?)\]=(.+?)$',info)
                if videovars_match:
                    key = videovars_match.group(1)
                    val = videovars_match.group(2)

                    if key == 'title':
                        video_title = val.replace('+',' ')
                    if key == 'duration':
                        try:
                            duration = val
                        except ValueError:
                            duration = None
                    if key == 'big_thumb':
                        video_thumbnail = val

                    videourl_match = re.match(r'^video_urls\]\[(?P<resolution>\d+)p',key)
                    if videourl_match:
                        resolution = int(videourl_match.group('resolution'))
                        formats.append({
                            'resolution': resolution,            # 360, 480, ...
                            'ext': determine_ext(val),
                            'url': val
                        })

        # fatal error, if no download url is found
        if len(formats) == 0:
            raise ExtractorError,'no video url found'

        # Extract title - should be in the flashvars; if not, look elsewhere
        if video_title is None:
            video_title = self._html_search_regex(
                r'<title>(.*?)</title', webpage, 'title')

        return {
            'id': video_id,
            'formats': formats,
            'title': video_title,
            'thumbnail': video_thumbnail,
            'duration': duration,
            'description': None,
            'age_limit': 18
        }