You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

67 lines
2.2 KiB

  1. # coding: utf-8
  2. import re
  3. from ..utils import (
  4. compat_urllib_request,
  5. compat_urllib_parse
  6. )
  7. from .common import InfoExtractor
  8. class WeBSurgIE(InfoExtractor):
  9. IE_NAME = u'websurg.com'
  10. _VALID_URL = r'http://.*?\.websurg\.com/MEDIA/\?noheader=1&doi=(.*)'
  11. _TEST = {
  12. u'url': u'http://www.websurg.com/MEDIA/?noheader=1&doi=vd01en4012',
  13. u'file': u'vd01en4012.mp4',
  14. u'params': {
  15. u'skip_download': True,
  16. }
  17. }
  18. _LOGIN_URL = 'http://www.websurg.com/inc/login/login_div.ajax.php?login=1'
  19. def _real_extract(self, url):
  20. login_form = {
  21. 'username': self._downloader.params['username'],
  22. 'password': self._downloader.params['password'],
  23. 'Submit': 1
  24. }
  25. request = compat_urllib_request.Request(
  26. self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
  27. request.add_header(
  28. 'Content-Type', 'application/x-www-form-urlencoded;charset=utf-8')
  29. login_results = compat_urllib_request.urlopen(request).info()
  30. sessid = re.match(r'PHPSESSID=(.*);',
  31. login_results['Set-Cookie']).group(1)
  32. request = compat_urllib_request.Request(
  33. url, compat_urllib_parse.urlencode(login_form),
  34. {'Cookie': 'PHPSESSID=' + sessid + ';'})
  35. webpage = compat_urllib_request.urlopen(request).read()
  36. video_id = re.match(self._VALID_URL, url).group(1)
  37. url_info = re.search(r'streamer="(.*?)" src="(.*?)"', webpage)
  38. if url_info is None:
  39. self._downloader.report_warning(
  40. u'Unable to log in: bad username/password')
  41. return
  42. return {'id': video_id,
  43. 'title' : re.search(
  44. r'property="og:title" content="(.*?)" />'
  45. , webpage).group(1),
  46. 'description': re.search(
  47. r'name="description" content="(.*?)" />', webpage).group(1),
  48. 'ext' : 'mp4',
  49. 'url' : url_info.group(1) + '/' + url_info.group(2),
  50. 'thumbnail': re.search(
  51. r'property="og:image" content="(.*?)" />', webpage
  52. ).group(1)
  53. }