You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

102 lines
3.7 KiB

  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. from .common import InfoExtractor
  4. from ..utils import int_or_none
  5. class WebOfStoriesIE(InfoExtractor):
  6. _VALID_URL = r'https?://(?:www\.)?webofstories\.com/play/(?:[^/]+/)?(?P<id>[0-9]+)'
  7. _VIDEO_DOMAIN = 'http://eu-mobile.webofstories.com/'
  8. _GREAT_LIFE_STREAMER = 'rtmp://eu-cdn1.webofstories.com/cfx/st/'
  9. _USER_STREAMER = 'rtmp://eu-users.webofstories.com/cfx/st/'
  10. _TESTS = [
  11. {
  12. 'url': 'http://www.webofstories.com/play/hans.bethe/71',
  13. 'md5': '373e4dd915f60cfe3116322642ddf364',
  14. 'info_dict': {
  15. 'id': '4536',
  16. 'ext': 'mp4',
  17. 'title': 'The temperature of the sun',
  18. 'thumbnail': 're:^https?://.*\.jpg$',
  19. 'description': 'Hans Bethe talks about calculating the temperature of the sun',
  20. 'duration': 238,
  21. }
  22. },
  23. {
  24. 'url': 'http://www.webofstories.com/play/55908',
  25. 'md5': '2985a698e1fe3211022422c4b5ed962c',
  26. 'info_dict': {
  27. 'id': '55908',
  28. 'ext': 'mp4',
  29. 'title': 'The story of Gemmata obscuriglobus',
  30. 'thumbnail': 're:^https?://.*\.jpg$',
  31. 'description': 'Planctomycete talks about The story of Gemmata obscuriglobus',
  32. 'duration': 169,
  33. }
  34. },
  35. ]
  36. def _real_extract(self, url):
  37. video_id = self._match_id(url)
  38. webpage = self._download_webpage(url, video_id)
  39. title = self._og_search_title(webpage)
  40. description = self._html_search_meta('description', webpage)
  41. thumbnail = self._og_search_thumbnail(webpage)
  42. story_filename = self._search_regex(
  43. r'\.storyFileName\("([^"]+)"\)', webpage, 'story filename')
  44. speaker_id = self._search_regex(
  45. r'\.speakerId\("([^"]+)"\)', webpage, 'speaker ID')
  46. story_id = self._search_regex(
  47. r'\.storyId\((\d+)\)', webpage, 'story ID')
  48. speaker_type = self._search_regex(
  49. r'\.speakerType\("([^"]+)"\)', webpage, 'speaker type')
  50. great_life = self._search_regex(
  51. r'isGreatLifeStory\s*=\s*(true|false)', webpage, 'great life story')
  52. is_great_life_series = great_life == 'true'
  53. duration = int_or_none(self._search_regex(
  54. r'\.duration\((\d+)\)', webpage, 'duration', fatal=False))
  55. # URL building, see: http://www.webofstories.com/scripts/player.js
  56. ms_prefix = ''
  57. if speaker_type.lower() == 'ms':
  58. ms_prefix = 'mini_sites/'
  59. if is_great_life_series:
  60. mp4_url = '{0:}lives/{1:}/{2:}.mp4'.format(
  61. self._VIDEO_DOMAIN, speaker_id, story_filename)
  62. rtmp_ext = 'flv'
  63. streamer = self._GREAT_LIFE_STREAMER
  64. play_path = 'stories/{0:}/{1:}'.format(
  65. speaker_id, story_filename)
  66. else:
  67. mp4_url = '{0:}{1:}{2:}/{3:}.mp4'.format(
  68. self._VIDEO_DOMAIN, ms_prefix, speaker_id, story_filename)
  69. rtmp_ext = 'mp4'
  70. streamer = self._USER_STREAMER
  71. play_path = 'mp4:{0:}{1:}/{2}.mp4'.format(
  72. ms_prefix, speaker_id, story_filename)
  73. formats = [{
  74. 'format_id': 'mp4_sd',
  75. 'url': mp4_url,
  76. }, {
  77. 'format_id': 'rtmp_sd',
  78. 'page_url': url,
  79. 'url': streamer,
  80. 'ext': rtmp_ext,
  81. 'play_path': play_path,
  82. }]
  83. self._sort_formats(formats)
  84. return {
  85. 'id': story_id,
  86. 'title': title,
  87. 'formats': formats,
  88. 'thumbnail': thumbnail,
  89. 'description': description,
  90. 'duration': duration,
  91. }