You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

101 lines
3.2 KiB

  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. from .common import InfoExtractor
  4. from ..utils import (
  5. int_or_none,
  6. unified_timestamp,
  7. )
  8. class Zaq1IE(InfoExtractor):
  9. _VALID_URL = r'https?://(?:www\.)?zaq1\.pl/video/(?P<id>[^/?#&]+)'
  10. _TESTS = [{
  11. 'url': 'http://zaq1.pl/video/xev0e',
  12. 'md5': '24a5eb3f052e604ae597c4d0d19b351e',
  13. 'info_dict': {
  14. 'id': 'xev0e',
  15. 'title': 'DJ NA WESELE. TANIEC Z FIGURAMI.węgrów/sokołów podlaski/siedlce/mińsk mazowiecki/warszawa',
  16. 'description': 'www.facebook.com/weseledjKontakt: 728 448 199 / 505 419 147',
  17. 'ext': 'mp4',
  18. 'duration': 511,
  19. 'timestamp': 1490896361,
  20. 'uploader': 'Anonim',
  21. 'upload_date': '20170330',
  22. 'view_count': int,
  23. }
  24. }, {
  25. # malformed JSON-LD
  26. 'url': 'http://zaq1.pl/video/x81vn',
  27. 'info_dict': {
  28. 'id': 'x81vn',
  29. 'title': 'SEKRETNE ŻYCIE WALTERA MITTY',
  30. 'ext': 'mp4',
  31. 'duration': 6234,
  32. 'timestamp': 1493494860,
  33. 'uploader': 'Anonim',
  34. 'upload_date': '20170429',
  35. 'view_count': int,
  36. },
  37. 'params': {
  38. 'skip_download': True,
  39. },
  40. 'expected_warnings': ['Failed to parse JSON'],
  41. }]
  42. def _real_extract(self, url):
  43. video_id = self._match_id(url)
  44. webpage = self._download_webpage(url, video_id)
  45. video_url = self._search_regex(
  46. r'data-video-url=(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
  47. 'video url', group='url')
  48. info = self._search_json_ld(webpage, video_id, fatal=False)
  49. def extract_data(field, name, fatal=False):
  50. return self._search_regex(
  51. r'data-%s=(["\'])(?P<field>(?:(?!\1).)+)\1' % field,
  52. webpage, field, fatal=fatal, group='field')
  53. if not info.get('title'):
  54. info['title'] = extract_data('file-name', 'title', fatal=True)
  55. if not info.get('duration'):
  56. info['duration'] = int_or_none(extract_data('duration', 'duration'))
  57. if not info.get('thumbnail'):
  58. info['thumbnail'] = extract_data('photo-url', 'thumbnail')
  59. if not info.get('timestamp'):
  60. info['timestamp'] = unified_timestamp(self._html_search_meta(
  61. 'uploadDate', webpage, 'timestamp'))
  62. if not info.get('interactionCount'):
  63. info['view_count'] = int_or_none(self._html_search_meta(
  64. 'interactionCount', webpage, 'view count'))
  65. uploader = self._html_search_regex(
  66. r'Wideo dodał:\s*<a[^>]*>([^<]+)</a>', webpage, 'uploader',
  67. fatal=False)
  68. width = int_or_none(self._html_search_meta(
  69. 'width', webpage, fatal=False))
  70. height = int_or_none(self._html_search_meta(
  71. 'height', webpage, fatal=False))
  72. info.update({
  73. 'id': video_id,
  74. 'formats': [{
  75. 'url': video_url,
  76. 'width': width,
  77. 'height': height,
  78. 'http_headers': {
  79. 'Referer': url,
  80. },
  81. }],
  82. 'uploader': uploader,
  83. })
  84. return info