You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

84 lines
2.8 KiB

  1. from __future__ import unicode_literals
  2. from .common import InfoExtractor
  3. from ..compat import compat_urlparse
  4. from ..utils import (
  5. int_or_none,
  6. js_to_json,
  7. remove_end,
  8. unified_strdate,
  9. )
  10. class VidbitIE(InfoExtractor):
  11. _VALID_URL = r'https?://(?:www\.)?vidbit\.co/(?:watch|embed)\?.*?\bv=(?P<id>[\da-zA-Z]+)'
  12. _TESTS = [{
  13. 'url': 'http://www.vidbit.co/watch?v=jkL2yDOEq2',
  14. 'md5': '1a34b7f14defe3b8fafca9796892924d',
  15. 'info_dict': {
  16. 'id': 'jkL2yDOEq2',
  17. 'ext': 'mp4',
  18. 'title': 'Intro to VidBit',
  19. 'description': 'md5:5e0d6142eec00b766cbf114bfd3d16b7',
  20. 'thumbnail': r're:https?://.*\.jpg$',
  21. 'upload_date': '20160618',
  22. 'view_count': int,
  23. 'comment_count': int,
  24. }
  25. }, {
  26. 'url': 'http://www.vidbit.co/embed?v=jkL2yDOEq2&auto=0&water=0',
  27. 'only_matching': True,
  28. }]
  29. def _real_extract(self, url):
  30. video_id = self._match_id(url)
  31. webpage = self._download_webpage(
  32. compat_urlparse.urljoin(url, '/watch?v=%s' % video_id), video_id)
  33. video_url, title = [None] * 2
  34. config = self._parse_json(self._search_regex(
  35. r'(?s)\.setup\(({.+?})\);', webpage, 'setup', default='{}'),
  36. video_id, transform_source=js_to_json)
  37. if config:
  38. if config.get('file'):
  39. video_url = compat_urlparse.urljoin(url, config['file'])
  40. title = config.get('title')
  41. if not video_url:
  42. video_url = compat_urlparse.urljoin(url, self._search_regex(
  43. r'file\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
  44. webpage, 'video URL', group='url'))
  45. if not title:
  46. title = remove_end(
  47. self._html_search_regex(
  48. (r'<h1>(.+?)</h1>', r'<title>(.+?)</title>'),
  49. webpage, 'title', default=None) or self._og_search_title(webpage),
  50. ' - VidBit')
  51. description = self._html_search_meta(
  52. ('description', 'og:description', 'twitter:description'),
  53. webpage, 'description')
  54. upload_date = unified_strdate(self._html_search_meta(
  55. 'datePublished', webpage, 'upload date'))
  56. view_count = int_or_none(self._search_regex(
  57. r'<strong>(\d+)</strong> views',
  58. webpage, 'view count', fatal=False))
  59. comment_count = int_or_none(self._search_regex(
  60. r'id=["\']cmt_num["\'][^>]*>\((\d+)\)',
  61. webpage, 'comment count', fatal=False))
  62. return {
  63. 'id': video_id,
  64. 'url': video_url,
  65. 'title': title,
  66. 'description': description,
  67. 'thumbnail': self._og_search_thumbnail(webpage),
  68. 'upload_date': upload_date,
  69. 'view_count': view_count,
  70. 'comment_count': comment_count,
  71. }