You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

39 lines
1.2 KiB

  1. from __future__ import unicode_literals
  2. from .common import InfoExtractor
  3. class YouJizzIE(InfoExtractor):
  4. _VALID_URL = r'https?://(?:\w+\.)?youjizz\.com/videos/(?:[^/#?]+)?-(?P<id>[0-9]+)\.html(?:$|[?#])'
  5. _TESTS = [{
  6. 'url': 'http://www.youjizz.com/videos/zeichentrick-1-2189178.html',
  7. 'md5': '78fc1901148284c69af12640e01c6310',
  8. 'info_dict': {
  9. 'id': '2189178',
  10. 'ext': 'mp4',
  11. 'title': 'Zeichentrick 1',
  12. 'age_limit': 18,
  13. }
  14. }, {
  15. 'url': 'http://www.youjizz.com/videos/-2189178.html',
  16. 'only_matching': True,
  17. }]
  18. def _real_extract(self, url):
  19. video_id = self._match_id(url)
  20. webpage = self._download_webpage(url, video_id)
  21. # YouJizz's HTML5 player has invalid HTML
  22. webpage = webpage.replace('"controls', '" controls')
  23. age_limit = self._rta_search(webpage)
  24. video_title = self._html_search_regex(
  25. r'<title>\s*(.*)\s*</title>', webpage, 'title')
  26. info_dict = self._parse_html5_media_entries(url, webpage, video_id)[0]
  27. info_dict.update({
  28. 'id': video_id,
  29. 'title': video_title,
  30. 'age_limit': age_limit,
  31. })
  32. return info_dict