You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

116 lines
4.3 KiB

  1. import json
  2. import os
  3. import re
  4. import sys
  5. from .common import InfoExtractor
  6. from ..utils import (
  7. compat_urllib_parse_urlparse,
  8. compat_urllib_request,
  9. ExtractorError,
  10. unescapeHTML,
  11. unified_strdate,
  12. )
  13. from ..aes import (
  14. aes_decrypt_text
  15. )
  16. class YouPornIE(InfoExtractor):
  17. _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+))'
  18. _TEST = {
  19. u'url': u'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
  20. u'file': u'505835.mp4',
  21. u'md5': u'71ec5fcfddacf80f495efa8b6a8d9a89',
  22. u'info_dict': {
  23. u"upload_date": u"20101221",
  24. u"description": u"Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?",
  25. u"uploader": u"Ask Dan And Jennifer",
  26. u"title": u"Sex Ed: Is It Safe To Masturbate Daily?",
  27. u"age_limit": 18,
  28. }
  29. }
  30. def _real_extract(self, url):
  31. mobj = re.match(self._VALID_URL, url)
  32. video_id = mobj.group('videoid')
  33. url = 'http://www.' + mobj.group('url')
  34. req = compat_urllib_request.Request(url)
  35. req.add_header('Cookie', 'age_verified=1')
  36. webpage = self._download_webpage(req, video_id)
  37. age_limit = self._rta_search(webpage)
  38. # Get JSON parameters
  39. json_params = self._search_regex(r'var currentVideo = new Video\((.*)\);', webpage, u'JSON parameters')
  40. try:
  41. params = json.loads(json_params)
  42. except:
  43. raise ExtractorError(u'Invalid JSON')
  44. self.report_extraction(video_id)
  45. try:
  46. video_title = params['title']
  47. upload_date = unified_strdate(params['release_date_f'])
  48. video_description = params['description']
  49. video_uploader = params['submitted_by']
  50. thumbnail = params['thumbnails'][0]['image']
  51. except KeyError:
  52. raise ExtractorError('Missing JSON parameter: ' + sys.exc_info()[1])
  53. # Get all of the links from the page
  54. DOWNLOAD_LIST_RE = r'(?s)<ul class="downloadList">(?P<download_list>.*?)</ul>'
  55. download_list_html = self._search_regex(DOWNLOAD_LIST_RE,
  56. webpage, u'download list').strip()
  57. LINK_RE = r'<a href="([^"]+)">'
  58. links = re.findall(LINK_RE, download_list_html)
  59. # Get all encrypted links
  60. encrypted_links = re.findall(r'var encryptedQuality[0-9]{3}URL = \'([a-zA-Z0-9+/]+={0,2})\';', webpage)
  61. for encrypted_link in encrypted_links:
  62. link = aes_decrypt_text(encrypted_link, video_title, 32).decode('utf-8')
  63. links.append(link)
  64. if not links:
  65. raise ExtractorError(u'ERROR: no known formats available for video')
  66. formats = []
  67. for link in links:
  68. # A link looks like this:
  69. # http://cdn1.download.youporn.phncdn.com/201210/31/8004515/480p_370k_8004515/YouPorn%20-%20Nubile%20Films%20The%20Pillow%20Fight.mp4?nvb=20121113051249&nva=20121114051249&ir=1200&sr=1200&hash=014b882080310e95fb6a0
  70. # A path looks like this:
  71. # /201210/31/8004515/480p_370k_8004515/YouPorn%20-%20Nubile%20Films%20The%20Pillow%20Fight.mp4
  72. video_url = unescapeHTML(link)
  73. path = compat_urllib_parse_urlparse(video_url).path
  74. extension = os.path.splitext(path)[1][1:]
  75. format = path.split('/')[4].split('_')[:2]
  76. # size = format[0]
  77. # bitrate = format[1]
  78. format = "-".join(format)
  79. # title = u'%s-%s-%s' % (video_title, size, bitrate)
  80. formats.append({
  81. 'url': video_url,
  82. 'ext': extension,
  83. 'format': format,
  84. 'format_id': format,
  85. })
  86. # Sort and remove doubles
  87. formats.sort(key=lambda format: list(map(lambda s: s.zfill(6), format['format'].split('-'))))
  88. for i in range(len(formats)-1,0,-1):
  89. if formats[i]['format_id'] == formats[i-1]['format_id']:
  90. del formats[i]
  91. return {
  92. 'id': video_id,
  93. 'uploader': video_uploader,
  94. 'upload_date': upload_date,
  95. 'title': video_title,
  96. 'thumbnail': thumbnail,
  97. 'description': video_description,
  98. 'age_limit': age_limit,
  99. 'formats': formats,
  100. }