You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

96 lines
3.3 KiB

  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. import random
  5. from .common import InfoExtractor
  6. from ..utils import (
  7. int_or_none,
  8. float_or_none,
  9. unified_strdate,
  10. )
  11. class PornoVoisinesIE(InfoExtractor):
  12. _VALID_URL = r'https?://(?:www\.)?pornovoisines\.com/showvideo/(?P<id>\d+)/(?P<display_id>[^/]+)'
  13. _VIDEO_URL_TEMPLATE = 'http://stream%d.pornovoisines.com' \
  14. '/static/media/video/transcoded/%s-640x360-1000-trscded.mp4'
  15. _SERVER_NUMBERS = (1, 2)
  16. _TEST = {
  17. 'url': 'http://www.pornovoisines.com/showvideo/1285/recherche-appartement/',
  18. 'md5': '5ac670803bc12e9e7f9f662ce64cf1d1',
  19. 'info_dict': {
  20. 'id': '1285',
  21. 'display_id': 'recherche-appartement',
  22. 'ext': 'mp4',
  23. 'title': 'Recherche appartement',
  24. 'description': 'md5:819ea0b785e2a04667a1a01cdc89594e',
  25. 'thumbnail': 're:^https?://.*\.jpg$',
  26. 'upload_date': '20140925',
  27. 'duration': 120,
  28. 'view_count': int,
  29. 'average_rating': float,
  30. 'categories': ['Débutantes', 'Scénario', 'Sodomie'],
  31. 'age_limit': 18,
  32. }
  33. }
  34. @classmethod
  35. def build_video_url(cls, num):
  36. return cls._VIDEO_URL_TEMPLATE % (random.choice(cls._SERVER_NUMBERS), num)
  37. def _real_extract(self, url):
  38. mobj = re.match(self._VALID_URL, url)
  39. video_id = mobj.group('id')
  40. display_id = mobj.group('display_id')
  41. webpage = self._download_webpage(url, video_id)
  42. video_url = self.build_video_url(video_id)
  43. title = self._html_search_regex(
  44. r'<h1>(.+?)</h1>', webpage, 'title', flags=re.DOTALL)
  45. description = self._html_search_regex(
  46. r'<article id="descriptif">(.+?)</article>',
  47. webpage, 'description', fatal=False, flags=re.DOTALL)
  48. thumbnail = self._search_regex(
  49. r'<div id="mediaspace%s">\s*<img src="/?([^"]+)"' % video_id,
  50. webpage, 'thumbnail', fatal=False)
  51. if thumbnail:
  52. thumbnail = 'http://www.pornovoisines.com/%s' % thumbnail
  53. upload_date = unified_strdate(self._search_regex(
  54. r'Publié le ([\d-]+)', webpage, 'upload date', fatal=False))
  55. duration = int_or_none(self._search_regex(
  56. 'Durée (\d+)', webpage, 'duration', fatal=False))
  57. view_count = int_or_none(self._search_regex(
  58. r'(\d+) vues', webpage, 'view count', fatal=False))
  59. average_rating = self._search_regex(
  60. r'Note\s*:\s*(\d+(?:,\d+)?)', webpage, 'average rating', fatal=False)
  61. if average_rating:
  62. average_rating = float_or_none(average_rating.replace(',', '.'))
  63. categories = self._html_search_meta(
  64. 'keywords', webpage, 'categories', fatal=False)
  65. if categories:
  66. categories = [category.strip() for category in categories.split(',')]
  67. return {
  68. 'id': video_id,
  69. 'display_id': display_id,
  70. 'url': video_url,
  71. 'title': title,
  72. 'description': description,
  73. 'thumbnail': thumbnail,
  74. 'upload_date': upload_date,
  75. 'duration': duration,
  76. 'view_count': view_count,
  77. 'average_rating': average_rating,
  78. 'categories': categories,
  79. 'age_limit': 18,
  80. }