You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

49 lines
1.6 KiB

  1. import os
  2. import re
  3. from .common import InfoExtractor
  4. from ..utils import (
  5. compat_urllib_parse_urlparse,
  6. compat_urllib_request,
  7. compat_urllib_parse,
  8. )
  9. class MofosexIE(InfoExtractor):
  10. _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>mofosex\.com/videos/(?P<videoid>[0-9]+)/.*?\.html)'
  11. _TEST = {
  12. u'url': u'http://www.mofosex.com/videos/5018/japanese-teen-music-video.html',
  13. u'file': u'5018.mp4',
  14. u'md5': u'1b2eb47ac33cc75d4a80e3026b613c5a',
  15. u'info_dict': {
  16. u"title": u"Japanese Teen Music Video",
  17. u"age_limit": 18,
  18. }
  19. }
  20. def _real_extract(self, url):
  21. mobj = re.match(self._VALID_URL, url)
  22. video_id = mobj.group('videoid')
  23. url = 'http://www.' + mobj.group('url')
  24. req = compat_urllib_request.Request(url)
  25. req.add_header('Cookie', 'age_verified=1')
  26. webpage = self._download_webpage(req, video_id)
  27. video_title = self._html_search_regex(r'<h1>(.+?)<', webpage, u'title')
  28. video_url = compat_urllib_parse.unquote(self._html_search_regex(r'flashvars.video_url = \'([^\']+)', webpage, u'video_url'))
  29. path = compat_urllib_parse_urlparse(video_url).path
  30. extension = os.path.splitext(path)[1][1:]
  31. format = path.split('/')[5].split('_')[:2]
  32. format = "-".join(format)
  33. age_limit = self._rta_search(webpage)
  34. return {
  35. 'id': video_id,
  36. 'title': video_title,
  37. 'url': video_url,
  38. 'ext': extension,
  39. 'format': format,
  40. 'format_id': format,
  41. 'age_limit': age_limit,
  42. }