You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

97 lines
3.4 KiB

10 years ago
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. from .common import InfoExtractor
  5. from ..utils import (
  6. int_or_none,
  7. remove_end,
  8. remove_start,
  9. str_to_int,
  10. unified_strdate,
  11. )
  12. class PinkbikeIE(InfoExtractor):
  13. _VALID_URL = r'https?://(?:(?:www\.)?pinkbike\.com/video/|es\.pinkbike\.org/i/kvid/kvid-y5\.swf\?id=)(?P<id>[0-9]+)'
  14. _TESTS = [{
  15. 'url': 'http://www.pinkbike.com/video/402811/',
  16. 'md5': '4814b8ca7651034cd87e3361d5c2155a',
  17. 'info_dict': {
  18. 'id': '402811',
  19. 'ext': 'mp4',
  20. 'title': 'Brandon Semenuk - RAW 100',
  21. 'description': 'Official release: www.redbull.ca/rupertwalker',
  22. 'thumbnail': r're:^https?://.*\.jpg$',
  23. 'duration': 100,
  24. 'upload_date': '20150406',
  25. 'uploader': 'revelco',
  26. 'location': 'Victoria, British Columbia, Canada',
  27. 'view_count': int,
  28. 'comment_count': int,
  29. }
  30. }, {
  31. 'url': 'http://es.pinkbike.org/i/kvid/kvid-y5.swf?id=406629',
  32. 'only_matching': True,
  33. }]
  34. def _real_extract(self, url):
  35. video_id = self._match_id(url)
  36. webpage = self._download_webpage(
  37. 'http://www.pinkbike.com/video/%s' % video_id, video_id)
  38. formats = []
  39. for _, format_id, src in re.findall(
  40. r'data-quality=((?:\\)?["\'])(.+?)\1[^>]+src=\1(.+?)\1', webpage):
  41. height = int_or_none(self._search_regex(
  42. r'^(\d+)[pP]$', format_id, 'height', default=None))
  43. formats.append({
  44. 'url': src,
  45. 'format_id': format_id,
  46. 'height': height,
  47. })
  48. self._sort_formats(formats)
  49. title = remove_end(self._og_search_title(webpage), ' Video - Pinkbike')
  50. description = self._html_search_regex(
  51. r'(?s)id="media-description"[^>]*>(.+?)<',
  52. webpage, 'description', default=None) or remove_start(
  53. self._og_search_description(webpage), title + '. ')
  54. thumbnail = self._og_search_thumbnail(webpage)
  55. duration = int_or_none(self._html_search_meta(
  56. 'video:duration', webpage, 'duration'))
  57. uploader = self._search_regex(
  58. r'<a[^>]+\brel=["\']author[^>]+>([^<]+)', webpage,
  59. 'uploader', fatal=False)
  60. upload_date = unified_strdate(self._search_regex(
  61. r'class="fullTime"[^>]+title="([^"]+)"',
  62. webpage, 'upload date', fatal=False))
  63. location = self._html_search_regex(
  64. r'(?s)<dt>Location</dt>\s*<dd>(.+?)<',
  65. webpage, 'location', fatal=False)
  66. def extract_count(webpage, label):
  67. return str_to_int(self._search_regex(
  68. r'<span[^>]+class="stat-num"[^>]*>([\d,.]+)</span>\s*<span[^>]+class="stat-label"[^>]*>%s' % label,
  69. webpage, label, fatal=False))
  70. view_count = extract_count(webpage, 'Views')
  71. comment_count = extract_count(webpage, 'Comments')
  72. return {
  73. 'id': video_id,
  74. 'title': title,
  75. 'description': description,
  76. 'thumbnail': thumbnail,
  77. 'duration': duration,
  78. 'upload_date': upload_date,
  79. 'uploader': uploader,
  80. 'location': location,
  81. 'view_count': view_count,
  82. 'comment_count': comment_count,
  83. 'formats': formats
  84. }