You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

75 lines
2.4 KiB

  1. from __future__ import unicode_literals
  2. import re
  3. from .common import InfoExtractor
  4. from ..utils import (
  5. js_to_json,
  6. remove_end,
  7. determine_ext,
  8. )
  9. class HellPornoIE(InfoExtractor):
  10. _VALID_URL = r'https?://(?:www\.)?hellporno\.(?:com/videos|net/v)/(?P<id>[^/]+)'
  11. _TESTS = [{
  12. 'url': 'http://hellporno.com/videos/dixie-is-posing-with-naked-ass-very-erotic/',
  13. 'md5': '1fee339c610d2049699ef2aa699439f1',
  14. 'info_dict': {
  15. 'id': '149116',
  16. 'display_id': 'dixie-is-posing-with-naked-ass-very-erotic',
  17. 'ext': 'mp4',
  18. 'title': 'Dixie is posing with naked ass very erotic',
  19. 'thumbnail': r're:https?://.*\.jpg$',
  20. 'age_limit': 18,
  21. }
  22. }, {
  23. 'url': 'http://hellporno.net/v/186271/',
  24. 'only_matching': True,
  25. }]
  26. def _real_extract(self, url):
  27. display_id = self._match_id(url)
  28. webpage = self._download_webpage(url, display_id)
  29. title = remove_end(self._html_search_regex(
  30. r'<title>([^<]+)</title>', webpage, 'title'), ' - Hell Porno')
  31. flashvars = self._parse_json(self._search_regex(
  32. r'var\s+flashvars\s*=\s*({.+?});', webpage, 'flashvars'),
  33. display_id, transform_source=js_to_json)
  34. video_id = flashvars.get('video_id')
  35. thumbnail = flashvars.get('preview_url')
  36. ext = determine_ext(flashvars.get('postfix'), 'mp4')
  37. formats = []
  38. for video_url_key in ['video_url', 'video_alt_url']:
  39. video_url = flashvars.get(video_url_key)
  40. if not video_url:
  41. continue
  42. video_text = flashvars.get('%s_text' % video_url_key)
  43. fmt = {
  44. 'url': video_url,
  45. 'ext': ext,
  46. 'format_id': video_text,
  47. }
  48. m = re.search(r'^(?P<height>\d+)[pP]', video_text)
  49. if m:
  50. fmt['height'] = int(m.group('height'))
  51. formats.append(fmt)
  52. self._sort_formats(formats)
  53. categories = self._html_search_meta(
  54. 'keywords', webpage, 'categories', default='').split(',')
  55. return {
  56. 'id': video_id,
  57. 'display_id': display_id,
  58. 'title': title,
  59. 'thumbnail': thumbnail,
  60. 'categories': categories,
  61. 'age_limit': 18,
  62. 'formats': formats,
  63. }