You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

91 lines
2.6 KiB

  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. from .common import InfoExtractor
  5. from ..utils import (
  6. compat_urllib_parse,
  7. compat_urllib_request,
  8. parse_duration,
  9. )
  10. class ShareSixIE(InfoExtractor):
  11. _VALID_URL = r'https?://(?:www\.)?sharesix\.com/(?:f/)?(?P<id>[0-9a-zA-Z]+)'
  12. _TESTS = [
  13. {
  14. 'url': 'http://sharesix.com/f/OXjQ7Y6',
  15. 'md5': '9e8e95d8823942815a7d7c773110cc93',
  16. 'info_dict': {
  17. 'id': 'OXjQ7Y6',
  18. 'ext': 'mp4',
  19. 'title': 'big_buck_bunny_480p_surround-fix.avi',
  20. 'duration': 596,
  21. 'width': 854,
  22. 'height': 480,
  23. },
  24. },
  25. {
  26. 'url': 'http://sharesix.com/lfrwoxp35zdd',
  27. 'md5': 'dd19f1435b7cec2d7912c64beeee8185',
  28. 'info_dict': {
  29. 'id': 'lfrwoxp35zdd',
  30. 'ext': 'flv',
  31. 'title': 'WhiteBoard___a_Mac_vs_PC_Parody_Cartoon.mp4.flv',
  32. 'duration': 65,
  33. 'width': 1280,
  34. 'height': 720,
  35. },
  36. }
  37. ]
  38. def _real_extract(self, url):
  39. mobj = re.match(self._VALID_URL, url)
  40. video_id = mobj.group('id')
  41. fields = {
  42. 'method_free': 'Free'
  43. }
  44. post = compat_urllib_parse.urlencode(fields)
  45. req = compat_urllib_request.Request(url, post)
  46. req.add_header('Content-type', 'application/x-www-form-urlencoded')
  47. webpage = self._download_webpage(req, video_id,
  48. 'Downloading video page')
  49. video_url = self._search_regex(
  50. r"var\slnk1\s=\s'([^']+)'", webpage, 'video URL')
  51. title = self._html_search_regex(
  52. r'(?s)<dt>Filename:</dt>.+?<dd>(.+?)</dd>', webpage, 'title')
  53. duration = parse_duration(
  54. self._search_regex(
  55. r'(?s)<dt>Length:</dt>.+?<dd>(.+?)</dd>',
  56. webpage,
  57. 'duration',
  58. fatal=False
  59. )
  60. )
  61. m = re.search(
  62. r'''(?xs)<dt>Width\sx\sHeight</dt>.+?
  63. <dd>(?P<width>\d+)\sx\s(?P<height>\d+)</dd>''',
  64. webpage
  65. )
  66. width = height = None
  67. if m:
  68. width, height = int(m.group('width')), int(m.group('height'))
  69. formats = [{
  70. 'format_id': 'sd',
  71. 'url': video_url,
  72. 'width': width,
  73. 'height': height,
  74. }]
  75. return {
  76. 'id': video_id,
  77. 'title': title,
  78. 'duration': duration,
  79. 'formats': formats,
  80. }