Browse Source

[motherless] Add support for groups

master-ytdl-org
Martin Weinelt 7 years ago
committed by Sergey M
parent
commit
45283afdec
2 changed files with 77 additions and 1 deletions
  1. +4
    -1
      youtube_dl/extractor/extractors.py
  2. +73
    -0
      youtube_dl/extractor/motherless.py

+ 4
- 1
youtube_dl/extractor/extractors.py View File

@ -609,7 +609,10 @@ from .mofosex import MofosexIE
from .mojvideo import MojvideoIE from .mojvideo import MojvideoIE
from .moniker import MonikerIE from .moniker import MonikerIE
from .morningstar import MorningstarIE from .morningstar import MorningstarIE
from .motherless import MotherlessIE
from .motherless import (
MotherlessIE,
MotherlessGroupIE
)
from .motorsport import MotorsportIE from .motorsport import MotorsportIE
from .movieclips import MovieClipsIE from .movieclips import MovieClipsIE
from .moviezine import MoviezineIE from .moviezine import MoviezineIE


+ 73
- 0
youtube_dl/extractor/motherless.py View File

@ -4,8 +4,11 @@ import datetime
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
InAdvancePagedList,
orderedSet,
str_to_int, str_to_int,
unified_strdate, unified_strdate,
) )
@ -114,3 +117,73 @@ class MotherlessIE(InfoExtractor):
'age_limit': age_limit, 'age_limit': age_limit,
'url': video_url, 'url': video_url,
} }
class MotherlessGroupIE(InfoExtractor):
_VALID_URL = 'https?://(?:www\.)?motherless\.com/gv?/(?P<id>[a-z0-9_]+)'
_TESTS = [{
'url': 'http://motherless.com/g/movie_scenes',
'info_dict': {
'id': 'movie_scenes',
'title': 'Movie Scenes',
'description': 'Hot and sexy scenes from "regular" movies... '
'Beautiful actresses fully nude... A looot of '
'skin! :)Enjoy!',
},
'playlist_mincount': 662,
}, {
'url': 'http://motherless.com/gv/sex_must_be_funny',
'info_dict': {
'id': 'sex_must_be_funny',
'title': 'Sex must be funny',
'description': 'Sex can be funny. Wide smiles,laugh, games, fun of '
'any kind!'
},
'playlist_mincount': 9,
}]
@classmethod
def suitable(cls, url):
return (False if MotherlessIE.suitable(url)
else super(MotherlessGroupIE, cls).suitable(url))
def _extract_entries(self, webpage, base):
return [
self.url_result(
compat_urlparse.urljoin(base, video_path),
MotherlessIE.ie_key(), video_title=title)
for video_path, title in orderedSet(re.findall(
r'href="/([^"]+)"[^>]+>\s+<img[^>]+alt="[^-]+-\s([^"]+)"',
webpage))
]
def _real_extract(self, url):
group_id = self._match_id(url)
page_url = compat_urlparse.urljoin(url, '/gv/%s' % group_id)
webpage = self._download_webpage(page_url, group_id)
title = self._search_regex(
r'<title>([\w\s]+\w)\s+-', webpage, 'title', fatal=False)
description = self._html_search_meta(
'description', webpage, fatal=False)
page_count = self._int(self._search_regex(
r'(\d+)</(?:a|span)><(?:a|span)[^>]+>\s*NEXT',
webpage, 'page_count'), 'page_count')
PAGE_SIZE = 80
def _get_page(idx):
webpage = self._download_webpage(
page_url, group_id, query={'page': idx + 1},
note='Downloading page %d/%d' % (idx + 1, page_count)
)
for entry in self._extract_entries(webpage, url):
yield entry
playlist = InAdvancePagedList(_get_page, page_count, PAGE_SIZE)
return {
'_type': 'playlist',
'id': group_id,
'title': title,
'description': description,
'entries': playlist
}

Loading…
Cancel
Save