You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

125 lines
4.3 KiB

  1. from __future__ import unicode_literals
  2. import re
  3. from .common import InfoExtractor
  4. from ..utils import (
  5. int_or_none,
  6. unified_strdate,
  7. unescapeHTML,
  8. )
  9. class UstudioIE(InfoExtractor):
  10. IE_NAME = 'ustudio'
  11. _VALID_URL = r'https?://(?:(?:www|v1)\.)?ustudio\.com/video/(?P<id>[^/]+)/(?P<display_id>[^/?#&]+)'
  12. _TEST = {
  13. 'url': 'http://ustudio.com/video/Uxu2my9bgSph/san_francisco_golden_gate_bridge',
  14. 'md5': '58bbfca62125378742df01fc2abbdef6',
  15. 'info_dict': {
  16. 'id': 'Uxu2my9bgSph',
  17. 'display_id': 'san_francisco_golden_gate_bridge',
  18. 'ext': 'mp4',
  19. 'title': 'San Francisco: Golden Gate Bridge',
  20. 'description': 'md5:23925500697f2c6d4830e387ba51a9be',
  21. 'thumbnail': r're:^https?://.*\.jpg$',
  22. 'upload_date': '20111107',
  23. 'uploader': 'Tony Farley',
  24. }
  25. }
  26. def _real_extract(self, url):
  27. video_id, display_id = re.match(self._VALID_URL, url).groups()
  28. config = self._download_xml(
  29. 'http://v1.ustudio.com/embed/%s/ustudio/config.xml' % video_id,
  30. display_id)
  31. def extract(kind):
  32. return [{
  33. 'url': unescapeHTML(item.attrib['url']),
  34. 'width': int_or_none(item.get('width')),
  35. 'height': int_or_none(item.get('height')),
  36. } for item in config.findall('./qualities/quality/%s' % kind) if item.get('url')]
  37. formats = extract('video')
  38. self._sort_formats(formats)
  39. webpage = self._download_webpage(url, display_id)
  40. title = self._og_search_title(webpage)
  41. upload_date = unified_strdate(self._search_regex(
  42. r'(?s)Uploaded by\s*.+?\s*on\s*<span>([^<]+)</span>',
  43. webpage, 'upload date', fatal=False))
  44. uploader = self._search_regex(
  45. r'Uploaded by\s*<a[^>]*>([^<]+)<',
  46. webpage, 'uploader', fatal=False)
  47. return {
  48. 'id': video_id,
  49. 'display_id': display_id,
  50. 'title': title,
  51. 'description': self._og_search_description(webpage),
  52. 'thumbnails': extract('image'),
  53. 'upload_date': upload_date,
  54. 'uploader': uploader,
  55. 'formats': formats,
  56. }
  57. class UstudioEmbedIE(InfoExtractor):
  58. IE_NAME = 'ustudio:embed'
  59. _VALID_URL = r'https?://(?:(?:app|embed)\.)?ustudio\.com/embed/(?P<uid>[^/]+)/(?P<id>[^/]+)'
  60. _TEST = {
  61. 'url': 'http://app.ustudio.com/embed/DeN7VdYRDKhP/Uw7G1kMCe65T',
  62. 'md5': '47c0be52a09b23a7f40de9469cec58f4',
  63. 'info_dict': {
  64. 'id': 'Uw7G1kMCe65T',
  65. 'ext': 'mp4',
  66. 'title': '5 Things IT Should Know About Video',
  67. 'description': 'md5:93d32650884b500115e158c5677d25ad',
  68. 'uploader_id': 'DeN7VdYRDKhP',
  69. }
  70. }
  71. def _real_extract(self, url):
  72. uploader_id, video_id = re.match(self._VALID_URL, url).groups()
  73. video_data = self._download_json(
  74. 'http://app.ustudio.com/embed/%s/%s/config.json' % (uploader_id, video_id),
  75. video_id)['videos'][0]
  76. title = video_data['name']
  77. formats = []
  78. for ext, qualities in video_data.get('transcodes', {}).items():
  79. for quality in qualities:
  80. quality_url = quality.get('url')
  81. if not quality_url:
  82. continue
  83. height = int_or_none(quality.get('height'))
  84. formats.append({
  85. 'format_id': '%s-%dp' % (ext, height) if height else ext,
  86. 'url': quality_url,
  87. 'width': int_or_none(quality.get('width')),
  88. 'height': height,
  89. })
  90. self._sort_formats(formats)
  91. thumbnails = []
  92. for image in video_data.get('images', []):
  93. image_url = image.get('url')
  94. if not image_url:
  95. continue
  96. thumbnails.append({
  97. 'url': image_url,
  98. })
  99. return {
  100. 'id': video_id,
  101. 'title': title,
  102. 'description': video_data.get('description'),
  103. 'duration': int_or_none(video_data.get('duration')),
  104. 'uploader_id': uploader_id,
  105. 'tags': video_data.get('keywords'),
  106. 'thumbnails': thumbnails,
  107. 'formats': formats,
  108. }