You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

64 lines
2.2 KiB

11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
  1. # encoding: utf-8
  2. from __future__ import unicode_literals
  3. from .common import InfoExtractor
  4. from ..compat import (
  5. compat_urllib_parse,
  6. compat_urllib_request,
  7. compat_urlparse,
  8. )
  9. from ..utils import (
  10. ExtractorError,
  11. )
  12. class Vbox7IE(InfoExtractor):
  13. _VALID_URL = r'http://(?:www\.)?vbox7\.com/play:(?P<id>[^/]+)'
  14. _TEST = {
  15. 'url': 'http://vbox7.com/play:249bb972c2',
  16. 'md5': '99f65c0c9ef9b682b97313e052734c3f',
  17. 'info_dict': {
  18. 'id': '249bb972c2',
  19. 'ext': 'mp4',
  20. 'title': 'Смях! Чудо - чист за секунди - Скрита камера',
  21. },
  22. }
  23. def _real_extract(self, url):
  24. video_id = self._match_id(url)
  25. # need to get the page 3 times for the correct jsSecretToken cookie
  26. # which is necessary for the correct title
  27. def get_session_id():
  28. redirect_page = self._download_webpage(url, video_id)
  29. session_id_url = self._search_regex(
  30. r'var\s*url\s*=\s*\'([^\']+)\';', redirect_page,
  31. 'session id url')
  32. self._download_webpage(
  33. compat_urlparse.urljoin(url, session_id_url), video_id,
  34. 'Getting session id')
  35. get_session_id()
  36. get_session_id()
  37. webpage = self._download_webpage(url, video_id,
  38. 'Downloading redirect page')
  39. title = self._html_search_regex(r'<title>(.*)</title>',
  40. webpage, 'title').split('/')[0].strip()
  41. info_url = "http://vbox7.com/play/magare.do"
  42. data = compat_urllib_parse.urlencode({'as3': '1', 'vid': video_id})
  43. info_request = compat_urllib_request.Request(info_url, data)
  44. info_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
  45. info_response = self._download_webpage(info_request, video_id, 'Downloading info webpage')
  46. if info_response is None:
  47. raise ExtractorError('Unable to extract the media url')
  48. (final_url, thumbnail_url) = map(lambda x: x.split('=')[1], info_response.split('&'))
  49. return {
  50. 'id': video_id,
  51. 'url': final_url,
  52. 'title': title,
  53. 'thumbnail': thumbnail_url,
  54. }