You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

56 lines
1.8 KiB

  1. from __future__ import unicode_literals
  2. import re
  3. import base64
  4. from .common import InfoExtractor
  5. from ..utils import (
  6. ExtractorError,
  7. compat_urllib_request,
  8. compat_urllib_parse,
  9. int_or_none,
  10. )
  11. class SharedIE(InfoExtractor):
  12. _VALID_URL = r'http://shared\.sx/(?P<id>[\da-z]{10})'
  13. _TEST = {
  14. 'url': 'http://shared.sx/0060718775',
  15. 'md5': '106fefed92a8a2adb8c98e6a0652f49b',
  16. 'info_dict': {
  17. 'id': '0060718775',
  18. 'ext': 'mp4',
  19. 'title': 'Bmp4',
  20. },
  21. }
  22. def _real_extract(self, url):
  23. mobj = re.match(self._VALID_URL, url)
  24. video_id = mobj.group('id')
  25. page = self._download_webpage(url, video_id)
  26. if re.search(r'>File does not exist<', page) is not None:
  27. raise ExtractorError('Video %s does not exist' % video_id, expected=True)
  28. download_form = dict(re.findall(r'<input type="hidden" name="([^"]+)" value="([^"]*)"', page))
  29. request = compat_urllib_request.Request(url, compat_urllib_parse.urlencode(download_form))
  30. request.add_header('Content-Type', 'application/x-www-form-urlencoded')
  31. video_page = self._download_webpage(request, video_id, 'Downloading video page')
  32. video_url = self._html_search_regex(r'data-url="([^"]+)"', video_page, 'video URL')
  33. title = base64.b64decode(self._html_search_meta('full:title', page, 'title')).decode('utf-8')
  34. filesize = int_or_none(self._html_search_meta('full:size', page, 'file size', fatal=False))
  35. thumbnail = self._html_search_regex(
  36. r'data-poster="([^"]+)"', video_page, 'thumbnail', fatal=False, default=None)
  37. return {
  38. 'id': video_id,
  39. 'url': video_url,
  40. 'ext': 'mp4',
  41. 'filesize': filesize,
  42. 'title': title,
  43. 'thumbnail': thumbnail,
  44. }