You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

74 lines
2.2 KiB

  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. from .common import InfoExtractor
  5. from ..compat import compat_chr
  6. from ..utils import (
  7. decode_packed_codes,
  8. ExtractorError,
  9. )
  10. class VShareIE(InfoExtractor):
  11. _VALID_URL = r'https?://(?:www\.)?vshare\.io/[dv]/(?P<id>[^/?#&]+)'
  12. _TESTS = [{
  13. 'url': 'https://vshare.io/d/0f64ce6',
  14. 'md5': '17b39f55b5497ae8b59f5fbce8e35886',
  15. 'info_dict': {
  16. 'id': '0f64ce6',
  17. 'title': 'vl14062007715967',
  18. 'ext': 'mp4',
  19. }
  20. }, {
  21. 'url': 'https://vshare.io/v/0f64ce6/width-650/height-430/1',
  22. 'only_matching': True,
  23. }]
  24. @staticmethod
  25. def _extract_urls(webpage):
  26. return re.findall(
  27. r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?vshare\.io/v/[^/?#&]+)',
  28. webpage)
  29. def _extract_packed(self, webpage):
  30. packed = self._search_regex(
  31. r'(eval\(function.+)', webpage, 'packed code')
  32. unpacked = decode_packed_codes(packed)
  33. digits = self._search_regex(r'\[((?:\d+,?)+)\]', unpacked, 'digits')
  34. digits = [int(digit) for digit in digits.split(',')]
  35. key_digit = self._search_regex(
  36. r'fromCharCode\(.+?(\d+)\)}', unpacked, 'key digit')
  37. chars = [compat_chr(d - int(key_digit)) for d in digits]
  38. return ''.join(chars)
  39. def _real_extract(self, url):
  40. video_id = self._match_id(url)
  41. webpage = self._download_webpage(
  42. 'https://vshare.io/v/%s/width-650/height-430/1' % video_id,
  43. video_id)
  44. title = self._html_search_regex(
  45. r'<title>([^<]+)</title>', webpage, 'title')
  46. title = title.split(' - ')[0]
  47. error = self._html_search_regex(
  48. r'(?s)<div[^>]+\bclass=["\']xxx-error[^>]+>(.+?)</div', webpage,
  49. 'error', default=None)
  50. if error:
  51. raise ExtractorError(error, expected=True)
  52. info = self._parse_html5_media_entries(
  53. url, '<video>%s</video>' % self._extract_packed(webpage),
  54. video_id)[0]
  55. self._sort_formats(info['formats'])
  56. info.update({
  57. 'id': video_id,
  58. 'title': title,
  59. })
  60. return info