You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

44 lines
1.9 KiB

  1. import re
  2. from .common import InfoExtractor
  3. from ..utils import (
  4. compat_urllib_parse,
  5. )
  6. class CSpanIE(InfoExtractor):
  7. _VALID_URL = r'http://www.c-spanvideo.org/program/(.*)'
  8. def _real_extract(self, url):
  9. mobj = re.match(self._VALID_URL, url)
  10. prog_name = mobj.group(1)
  11. webpage = self._download_webpage(url, prog_name)
  12. video_id = self._search_regex(r'programid=(.*?)&', webpage, 'video id')
  13. data = compat_urllib_parse.urlencode({'programid': video_id,
  14. 'dynamic':'1'})
  15. info_url = 'http://www.c-spanvideo.org/common/services/flashXml.php?' + data
  16. video_info = self._download_webpage(info_url, video_id, u'Downloading video info')
  17. self.report_extraction(video_id)
  18. title = self._html_search_regex(r'<string name="title">(.*?)</string>',
  19. video_info, 'title')
  20. description = self._html_search_regex(r'<meta (?:property="og:|name=")description" content="(.*?)"',
  21. webpage, 'description',
  22. flags=re.MULTILINE|re.DOTALL)
  23. thumbnail = self._html_search_regex(r'<meta property="og:image" content="(.*?)"',
  24. webpage, 'thumbnail')
  25. url = self._search_regex(r'<string name="URL">(.*?)</string>',
  26. video_info, 'video url')
  27. url = url.replace('$(protocol)', 'rtmp').replace('$(port)', '443')
  28. path = self._search_regex(r'<string name="path">(.*?)</string>',
  29. video_info, 'rtmp play path')
  30. return {'id': video_id,
  31. 'title': title,
  32. 'ext': 'flv',
  33. 'url': url,
  34. 'play_path': path,
  35. 'description': description,
  36. 'thumbnail': thumbnail,
  37. }