You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

142 lines
5.3 KiB

  1. import json
  2. import os
  3. import re
  4. import sys
  5. from .common import InfoExtractor
  6. from ..utils import (
  7. compat_str,
  8. compat_urllib_parse_urlparse,
  9. compat_urllib_request,
  10. ExtractorError,
  11. unescapeHTML,
  12. unified_strdate,
  13. )
  14. from ..aes import (
  15. aes_decrypt_text
  16. )
  17. class YouPornIE(InfoExtractor):
  18. _VALID_URL = r'^(?:https?://)?(?:\w+\.)?youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+)'
  19. _TEST = {
  20. u'url': u'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
  21. u'file': u'505835.mp4',
  22. u'md5': u'71ec5fcfddacf80f495efa8b6a8d9a89',
  23. u'info_dict': {
  24. u"upload_date": u"20101221",
  25. u"description": u"Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?",
  26. u"uploader": u"Ask Dan And Jennifer",
  27. u"title": u"Sex Ed: Is It Safe To Masturbate Daily?"
  28. }
  29. }
  30. def _print_formats(self, formats):
  31. """Print all available formats"""
  32. print(u'Available formats:')
  33. print(u'ext\t\tformat')
  34. print(u'---------------------------------')
  35. for format in formats:
  36. print(u'%s\t\t%s' % (format['ext'], format['format']))
  37. def _specific(self, req_format, formats):
  38. for x in formats:
  39. if x["format"] == req_format:
  40. return x
  41. return None
  42. def _real_extract(self, url):
  43. mobj = re.match(self._VALID_URL, url)
  44. video_id = mobj.group('videoid')
  45. req = compat_urllib_request.Request(url)
  46. req.add_header('Cookie', 'age_verified=1')
  47. webpage = self._download_webpage(req, video_id)
  48. # Get JSON parameters
  49. json_params = self._search_regex(r'var currentVideo = new Video\((.*)\);', webpage, u'JSON parameters')
  50. try:
  51. params = json.loads(json_params)
  52. except:
  53. raise ExtractorError(u'Invalid JSON')
  54. self.report_extraction(video_id)
  55. try:
  56. video_title = params['title']
  57. upload_date = unified_strdate(params['release_date_f'])
  58. video_description = params['description']
  59. video_uploader = params['submitted_by']
  60. thumbnail = params['thumbnails'][0]['image']
  61. except KeyError:
  62. raise ExtractorError('Missing JSON parameter: ' + sys.exc_info()[1])
  63. # Get all of the formats available
  64. DOWNLOAD_LIST_RE = r'(?s)<ul class="downloadList">(?P<download_list>.*?)</ul>'
  65. download_list_html = self._search_regex(DOWNLOAD_LIST_RE,
  66. webpage, u'download list').strip()
  67. # Get all of the links from the page
  68. LINK_RE = r'(?s)<a href="(?P<url>[^"]+)">'
  69. links = re.findall(LINK_RE, download_list_html)
  70. # Get link of hd video
  71. encrypted_video_url = self._html_search_regex(
  72. r'var encrypted(?:Quality[0-9]+)?URL = \'(?P<encrypted_video_url>[a-zA-Z0-9+/]+={0,2})\';',
  73. webpage, u'encrypted_video_url')
  74. video_url = aes_decrypt_text(encrypted_video_url, video_title, 32)
  75. print(video_url)
  76. assert isinstance(video_url, compat_str)
  77. if video_url.split('/')[6].split('_')[0] == u'720p': # only add if 720p to avoid duplicates
  78. links = [video_url] + links
  79. if not links:
  80. raise ExtractorError(u'ERROR: no known formats available for video')
  81. self.to_screen(u'Links found: %d' % len(links))
  82. formats = []
  83. for link in links:
  84. # A link looks like this:
  85. # http://cdn1.download.youporn.phncdn.com/201210/31/8004515/480p_370k_8004515/YouPorn%20-%20Nubile%20Films%20The%20Pillow%20Fight.mp4?nvb=20121113051249&nva=20121114051249&ir=1200&sr=1200&hash=014b882080310e95fb6a0
  86. # A path looks like this:
  87. # /201210/31/8004515/480p_370k_8004515/YouPorn%20-%20Nubile%20Films%20The%20Pillow%20Fight.mp4
  88. video_url = unescapeHTML( link )
  89. path = compat_urllib_parse_urlparse( video_url ).path
  90. extension = os.path.splitext( path )[1][1:]
  91. format = path.split('/')[4].split('_')[:2]
  92. # size = format[0]
  93. # bitrate = format[1]
  94. format = "-".join( format )
  95. # title = u'%s-%s-%s' % (video_title, size, bitrate)
  96. formats.append({
  97. 'id': video_id,
  98. 'url': video_url,
  99. 'uploader': video_uploader,
  100. 'upload_date': upload_date,
  101. 'title': video_title,
  102. 'ext': extension,
  103. 'format': format,
  104. 'thumbnail': thumbnail,
  105. 'description': video_description
  106. })
  107. if self._downloader.params.get('listformats', None):
  108. self._print_formats(formats)
  109. return
  110. req_format = self._downloader.params.get('format', 'best')
  111. self.to_screen(u'Format: %s' % req_format)
  112. if req_format is None or req_format == 'best':
  113. return [formats[0]]
  114. elif req_format == 'worst':
  115. return [formats[-1]]
  116. elif req_format in ('-1', 'all'):
  117. return formats
  118. else:
  119. format = self._specific( req_format, formats )
  120. if format is None:
  121. raise ExtractorError(u'Requested format not available')
  122. return [format]