You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

51 lines
2.1 KiB

  1. import re
  2. from .common import InfoExtractor
  3. from ..utils import (
  4. compat_urllib_parse,
  5. )
  6. class CSpanIE(InfoExtractor):
  7. _VALID_URL = r'http://www.c-spanvideo.org/program/(.*)'
  8. _TEST = {
  9. u'url': u'http://www.c-spanvideo.org/program/HolderonV',
  10. u'file': u'315139.flv',
  11. u'md5': u'74a623266956f69e4df0068ab6c80fe4',
  12. u'info_dict': {
  13. u"title": u"Attorney General Eric Holder on Voting Rights Act Decision"
  14. },
  15. u'skip': u'Requires rtmpdump'
  16. }
  17. def _real_extract(self, url):
  18. mobj = re.match(self._VALID_URL, url)
  19. prog_name = mobj.group(1)
  20. webpage = self._download_webpage(url, prog_name)
  21. video_id = self._search_regex(r'programid=(.*?)&', webpage, 'video id')
  22. data = compat_urllib_parse.urlencode({'programid': video_id,
  23. 'dynamic':'1'})
  24. info_url = 'http://www.c-spanvideo.org/common/services/flashXml.php?' + data
  25. video_info = self._download_webpage(info_url, video_id, u'Downloading video info')
  26. self.report_extraction(video_id)
  27. title = self._html_search_regex(r'<string name="title">(.*?)</string>',
  28. video_info, 'title')
  29. description = self._html_search_regex(r'<meta (?:property="og:|name=")description" content="(.*?)"',
  30. webpage, 'description',
  31. flags=re.MULTILINE|re.DOTALL)
  32. url = self._search_regex(r'<string name="URL">(.*?)</string>',
  33. video_info, 'video url')
  34. url = url.replace('$(protocol)', 'rtmp').replace('$(port)', '443')
  35. path = self._search_regex(r'<string name="path">(.*?)</string>',
  36. video_info, 'rtmp play path')
  37. return {'id': video_id,
  38. 'title': title,
  39. 'ext': 'flv',
  40. 'url': url,
  41. 'play_path': path,
  42. 'description': description,
  43. 'thumbnail': self._og_search_thumbnail(webpage),
  44. }