You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

89 lines
3.0 KiB

  1. from __future__ import unicode_literals
  2. from .common import InfoExtractor
  3. from ..compat import (
  4. compat_urllib_parse_urlencode,
  5. compat_urlparse,
  6. )
  7. from ..utils import (
  8. get_element_by_attribute,
  9. int_or_none,
  10. )
  11. class MiTeleIE(InfoExtractor):
  12. IE_DESC = 'mitele.es'
  13. _VALID_URL = r'https?://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P<id>[^/]+)/'
  14. _TESTS = [{
  15. 'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/',
  16. 'md5': '0ff1a13aebb35d9bc14081ff633dd324',
  17. 'info_dict': {
  18. 'id': '0NF1jJnxS1Wu3pHrmvFyw2',
  19. 'display_id': 'programa-144',
  20. 'ext': 'flv',
  21. 'title': 'Tor, la web invisible',
  22. 'description': 'md5:3b6fce7eaa41b2d97358726378d9369f',
  23. 'thumbnail': 're:(?i)^https?://.*\.jpg$',
  24. 'duration': 2913,
  25. },
  26. }]
  27. def _real_extract(self, url):
  28. display_id = self._match_id(url)
  29. webpage = self._download_webpage(url, display_id)
  30. config_url = self._search_regex(
  31. r'data-config\s*=\s*"([^"]+)"', webpage, 'data config url')
  32. config_url = compat_urlparse.urljoin(url, config_url)
  33. config = self._download_json(
  34. config_url, display_id, 'Downloading config JSON')
  35. mmc = self._download_json(
  36. config['services']['mmc'], display_id, 'Downloading mmc JSON')
  37. formats = []
  38. for location in mmc['locations']:
  39. gat = self._proto_relative_url(location.get('gat'), 'http:')
  40. bas = location.get('bas')
  41. loc = location.get('loc')
  42. ogn = location.get('ogn')
  43. if None in (gat, bas, loc, ogn):
  44. continue
  45. token_data = {
  46. 'bas': bas,
  47. 'icd': loc,
  48. 'ogn': ogn,
  49. 'sta': '0',
  50. }
  51. media = self._download_json(
  52. '%s/?%s' % (gat, compat_urllib_parse_urlencode(token_data)),
  53. display_id, 'Downloading %s JSON' % location['loc'])
  54. file_ = media.get('file')
  55. if not file_:
  56. continue
  57. formats.extend(self._extract_f4m_formats(
  58. file_ + '&hdcore=3.2.0&plugin=aasp-3.2.0.77.18',
  59. display_id, f4m_id=loc))
  60. self._sort_formats(formats)
  61. title = self._search_regex(
  62. r'class="Destacado-text"[^>]*>\s*<strong>([^<]+)</strong>', webpage, 'title')
  63. video_id = self._search_regex(
  64. r'data-media-id\s*=\s*"([^"]+)"', webpage,
  65. 'data media id', default=None) or display_id
  66. thumbnail = config.get('poster', {}).get('imageUrl')
  67. duration = int_or_none(mmc.get('duration'))
  68. return {
  69. 'id': video_id,
  70. 'display_id': display_id,
  71. 'title': title,
  72. 'description': get_element_by_attribute('class', 'text', webpage),
  73. 'thumbnail': thumbnail,
  74. 'duration': duration,
  75. 'formats': formats,
  76. }