@ -1,58 +1,25 @@ | |||||
from __future__ import unicode_literals | from __future__ import unicode_literals | ||||
import re | |||||
from .tnaflix import TNAFlixIE | |||||
from .common import InfoExtractor | |||||
from ..utils import fix_xml_ampersands | |||||
class EMPFlixIE(TNAFlixIE): | |||||
_VALID_URL = r'^https?://www\.empflix\.com/videos/(?P<display_id>[0-9a-zA-Z-]+)-(?P<id>[0-9]+)\.html' | |||||
_TITLE_REGEX = r'name="title" value="(?P<title>[^"]*)"' | |||||
_DESCRIPTION_REGEX = r'name="description" value="([^"]*)"' | |||||
_CONFIG_REGEX = r'flashvars\.config\s*=\s*escape\("([^"]+)"' | |||||
class EmpflixIE(InfoExtractor): | |||||
_VALID_URL = r'^https?://www\.empflix\.com/videos/.*?-(?P<id>[0-9]+)\.html' | |||||
_TEST = { | _TEST = { | ||||
'url': 'http://www.empflix.com/videos/Amateur-Finger-Fuck-33051.html', | 'url': 'http://www.empflix.com/videos/Amateur-Finger-Fuck-33051.html', | ||||
'md5': 'b1bc15b6412d33902d6e5952035fcabc', | 'md5': 'b1bc15b6412d33902d6e5952035fcabc', | ||||
'info_dict': { | 'info_dict': { | ||||
'id': '33051', | 'id': '33051', | ||||
'display_id': 'Amateur-Finger-Fuck', | |||||
'ext': 'mp4', | 'ext': 'mp4', | ||||
'title': 'Amateur Finger Fuck', | 'title': 'Amateur Finger Fuck', | ||||
'description': 'Amateur solo finger fucking.', | 'description': 'Amateur solo finger fucking.', | ||||
'thumbnail': 're:https?://.*\.jpg$', | |||||
'age_limit': 18, | 'age_limit': 18, | ||||
} | } | ||||
} | } | ||||
def _real_extract(self, url): | |||||
mobj = re.match(self._VALID_URL, url) | |||||
video_id = mobj.group('id') | |||||
webpage = self._download_webpage(url, video_id) | |||||
age_limit = self._rta_search(webpage) | |||||
video_title = self._html_search_regex( | |||||
r'name="title" value="(?P<title>[^"]*)"', webpage, 'title') | |||||
video_description = self._html_search_regex( | |||||
r'name="description" value="([^"]*)"', webpage, 'description', fatal=False) | |||||
cfg_url = self._html_search_regex( | |||||
r'flashvars\.config = escape\("([^"]+)"', | |||||
webpage, 'flashvars.config') | |||||
cfg_xml = self._download_xml( | |||||
cfg_url, video_id, note='Downloading metadata', | |||||
transform_source=fix_xml_ampersands) | |||||
formats = [ | |||||
{ | |||||
'url': item.find('videoLink').text, | |||||
'format_id': item.find('res').text, | |||||
} for item in cfg_xml.findall('./quality/item') | |||||
] | |||||
thumbnail = cfg_xml.find('./startThumb').text | |||||
return { | |||||
'id': video_id, | |||||
'title': video_title, | |||||
'description': video_description, | |||||
'thumbnail': thumbnail, | |||||
'formats': formats, | |||||
'age_limit': age_limit, | |||||
} |
@ -0,0 +1,84 @@ | |||||
from __future__ import unicode_literals | |||||
import re | |||||
from .common import InfoExtractor | |||||
from ..utils import ( | |||||
parse_duration, | |||||
fix_xml_ampersands, | |||||
) | |||||
class TNAFlixIE(InfoExtractor): | |||||
_VALID_URL = r'https?://(?:www\.)?tnaflix\.com/(?P<cat_id>[\w-]+)/(?P<display_id>[\w-]+)/video(?P<id>\d+)' | |||||
_TITLE_REGEX = None | |||||
_DESCRIPTION_REGEX = r'<h3 itemprop="description">([^<]+)</h3>' | |||||
_CONFIG_REGEX = r'flashvars\.config\s*=\s*escape\("([^"]+)"' | |||||
_TEST = { | |||||
'url': 'http://www.tnaflix.com/porn-stars/Carmella-Decesare-striptease/video553878', | |||||
'md5': 'ecf3498417d09216374fc5907f9c6ec0', | |||||
'info_dict': { | |||||
'id': '553878', | |||||
'display_id': 'Carmella-Decesare-striptease', | |||||
'ext': 'mp4', | |||||
'title': 'Carmella Decesare - striptease', | |||||
'description': '', | |||||
'thumbnail': 're:https?://.*\.jpg$', | |||||
'duration': 91, | |||||
'age_limit': 18, | |||||
} | |||||
} | |||||
def _real_extract(self, url): | |||||
mobj = re.match(self._VALID_URL, url) | |||||
video_id = mobj.group('id') | |||||
display_id = mobj.group('display_id') | |||||
webpage = self._download_webpage(url, display_id) | |||||
title = self._html_search_regex( | |||||
self._TITLE_REGEX, webpage, 'title') if self._TITLE_REGEX else self._og_search_title(webpage) | |||||
description = self._html_search_regex( | |||||
self._DESCRIPTION_REGEX, webpage, 'description', fatal=False, default='') | |||||
age_limit = self._rta_search(webpage) | |||||
duration = self._html_search_meta('duration', webpage, 'duration', default=None) | |||||
if duration: | |||||
duration = parse_duration(duration[1:]) | |||||
cfg_url = self._html_search_regex( | |||||
self._CONFIG_REGEX, webpage, 'flashvars.config') | |||||
cfg_xml = self._download_xml( | |||||
cfg_url, display_id, note='Downloading metadata', | |||||
transform_source=fix_xml_ampersands) | |||||
thumbnail = cfg_xml.find('./startThumb').text | |||||
formats = [] | |||||
for item in cfg_xml.findall('./quality/item'): | |||||
video_url = re.sub('speed=\d+', 'speed=', item.find('videoLink').text) | |||||
format_id = item.find('res').text | |||||
fmt = { | |||||
'url': video_url, | |||||
'format_id': format_id, | |||||
} | |||||
m = re.search(r'^(\d+)', format_id) | |||||
if m: | |||||
fmt['height'] = int(m.group(1)) | |||||
formats.append(fmt) | |||||
self._sort_formats(formats) | |||||
return { | |||||
'id': video_id, | |||||
'display_id': display_id, | |||||
'title': title, | |||||
'description': description, | |||||
'thumbnail': thumbnail, | |||||
'duration': duration, | |||||
'age_limit': age_limit, | |||||
'formats': formats, | |||||
} |