You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

54 lines
2.0 KiB

  1. # coding: utf-8
  2. import re
  3. import xml.etree.ElementTree
  4. import json
  5. from .common import InfoExtractor
  6. from ..utils import (
  7. compat_urlparse,
  8. ExtractorError,
  9. find_xpath_attr,
  10. )
  11. class ORFIE(InfoExtractor):
  12. _VALID_URL = r'https?://tvthek.orf.at/(programs/.+?/episodes|topics/.+?)/(?P<id>\d+)'
  13. def _real_extract(self, url):
  14. mobj = re.match(self._VALID_URL, url)
  15. playlist_id = mobj.group('id')
  16. webpage = self._download_webpage(url, playlist_id)
  17. flash_xml = self._search_regex('ORF.flashXML = \'(.+?)\'', webpage, u'flash xml')
  18. flash_xml = compat_urlparse.parse_qs('xml='+flash_xml)['xml'][0]
  19. flash_config = xml.etree.ElementTree.fromstring(flash_xml.encode('utf-8'))
  20. playlist_json = self._search_regex(r'playlist\': \'(\[.*?\])\'', webpage, u'playlist').replace(r'\"','"')
  21. playlist = json.loads(playlist_json)
  22. videos = []
  23. ns = '{http://tempuri.org/XMLSchema.xsd}'
  24. xpath = '%(ns)sPlaylist/%(ns)sItems/%(ns)sItem' % {'ns': ns}
  25. webpage_description = self._og_search_description(webpage)
  26. for (i, (item, info)) in enumerate(zip(flash_config.findall(xpath), playlist), 1):
  27. # Get best quality url
  28. rtmp_url = None
  29. for q in ['Q6A', 'Q4A', 'Q1A']:
  30. video_url = find_xpath_attr(item, '%sVideoUrl' % ns, 'quality', q)
  31. if video_url is not None:
  32. rtmp_url = video_url.text
  33. break
  34. if rtmp_url is None:
  35. raise ExtractorError(u'Couldn\'t get video url: %s' % info['id'])
  36. description = self._html_search_regex(
  37. r'id="playlist_entry_%s".*?<p>(.*?)</p>' % i, webpage,
  38. u'description', default=webpage_description, flags=re.DOTALL)
  39. videos.append({
  40. '_type': 'video',
  41. 'id': info['id'],
  42. 'title': info['title'],
  43. 'url': rtmp_url,
  44. 'ext': 'flv',
  45. 'description': description,
  46. })
  47. return videos