You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

92 lines
3.0 KiB

  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. from .common import InfoExtractor
  5. class CloserToTruthIE(InfoExtractor):
  6. _VALID_URL = r'https?://(?:www\.)?closertotruth\.com/(?:[^/]+/)*(?P<id>[^/?#&]+)'
  7. _TESTS = [{
  8. 'url': 'http://closertotruth.com/series/solutions-the-mind-body-problem#video-3688',
  9. 'info_dict': {
  10. 'id': '0_zof1ktre',
  11. 'display_id': 'solutions-the-mind-body-problem',
  12. 'ext': 'mov',
  13. 'title': 'Solutions to the Mind-Body Problem?',
  14. 'upload_date': '20140221',
  15. 'timestamp': 1392956007,
  16. 'uploader_id': 'CTTXML'
  17. },
  18. 'params': {
  19. 'skip_download': True,
  20. },
  21. }, {
  22. 'url': 'http://closertotruth.com/episodes/how-do-brains-work',
  23. 'info_dict': {
  24. 'id': '0_iuxai6g6',
  25. 'display_id': 'how-do-brains-work',
  26. 'ext': 'mov',
  27. 'title': 'How do Brains Work?',
  28. 'upload_date': '20140221',
  29. 'timestamp': 1392956024,
  30. 'uploader_id': 'CTTXML'
  31. },
  32. 'params': {
  33. 'skip_download': True,
  34. },
  35. }, {
  36. 'url': 'http://closertotruth.com/interviews/1725',
  37. 'info_dict': {
  38. 'id': '1725',
  39. 'title': 'AyaFr-002',
  40. },
  41. 'playlist_mincount': 2,
  42. }]
  43. def _real_extract(self, url):
  44. display_id = self._match_id(url)
  45. webpage = self._download_webpage(url, display_id)
  46. partner_id = self._search_regex(
  47. r'<script[^>]+src=["\'].*?\b(?:partner_id|p)/(\d+)',
  48. webpage, 'kaltura partner_id')
  49. title = self._search_regex(
  50. r'<title>(.+?)\s*\|\s*.+?</title>', webpage, 'video title')
  51. select = self._search_regex(
  52. r'(?s)<select[^>]+id="select-version"[^>]*>(.+?)</select>',
  53. webpage, 'select version', default=None)
  54. if select:
  55. entry_ids = set()
  56. entries = []
  57. for mobj in re.finditer(
  58. r'<option[^>]+value=(["\'])(?P<id>[0-9a-z_]+)(?:#.+?)?\1[^>]*>(?P<title>[^<]+)',
  59. webpage):
  60. entry_id = mobj.group('id')
  61. if entry_id in entry_ids:
  62. continue
  63. entry_ids.add(entry_id)
  64. entries.append({
  65. '_type': 'url_transparent',
  66. 'url': 'kaltura:%s:%s' % (partner_id, entry_id),
  67. 'ie_key': 'Kaltura',
  68. 'title': mobj.group('title'),
  69. })
  70. if entries:
  71. return self.playlist_result(entries, display_id, title)
  72. entry_id = self._search_regex(
  73. r'<a[^>]+id=(["\'])embed-kaltura\1[^>]+data-kaltura=(["\'])(?P<id>[0-9a-z_]+)\2',
  74. webpage, 'kaltura entry_id', group='id')
  75. return {
  76. '_type': 'url_transparent',
  77. 'display_id': display_id,
  78. 'url': 'kaltura:%s:%s' % (partner_id, entry_id),
  79. 'ie_key': 'Kaltura',
  80. 'title': title
  81. }