You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

89 lines
2.9 KiB

  1. from __future__ import unicode_literals
  2. from .common import InfoExtractor
  3. from ..compat import (
  4. compat_urllib_parse,
  5. compat_urlparse,
  6. )
  7. from ..utils import (
  8. encode_dict,
  9. get_element_by_attribute,
  10. int_or_none,
  11. )
  12. class MiTeleIE(InfoExtractor):
  13. IE_DESC = 'mitele.es'
  14. _VALID_URL = r'http://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P<id>[^/]+)/'
  15. _TESTS = [{
  16. 'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/',
  17. 'md5': '0ff1a13aebb35d9bc14081ff633dd324',
  18. 'info_dict': {
  19. 'id': '0NF1jJnxS1Wu3pHrmvFyw2',
  20. 'display_id': 'programa-144',
  21. 'ext': 'flv',
  22. 'title': 'Tor, la web invisible',
  23. 'description': 'md5:3b6fce7eaa41b2d97358726378d9369f',
  24. 'thumbnail': 're:(?i)^https?://.*\.jpg$',
  25. 'duration': 2913,
  26. },
  27. }]
  28. def _real_extract(self, url):
  29. display_id = self._match_id(url)
  30. webpage = self._download_webpage(url, display_id)
  31. config_url = self._search_regex(
  32. r'data-config\s*=\s*"([^"]+)"', webpage, 'data config url')
  33. config_url = compat_urlparse.urljoin(url, config_url)
  34. config = self._download_json(
  35. config_url, display_id, 'Downloading config JSON')
  36. mmc = self._download_json(
  37. config['services']['mmc'], display_id, 'Downloading mmc JSON')
  38. formats = []
  39. for location in mmc['locations']:
  40. gat = self._proto_relative_url(location.get('gat'), 'http:')
  41. bas = location.get('bas')
  42. loc = location.get('loc')
  43. ogn = location.get('ogn')
  44. if None in (gat, bas, loc, ogn):
  45. continue
  46. token_data = {
  47. 'bas': bas,
  48. 'icd': loc,
  49. 'ogn': ogn,
  50. 'sta': '0',
  51. }
  52. media = self._download_json(
  53. '%s/?%s' % (gat, compat_urllib_parse.urlencode(encode_dict(token_data))),
  54. display_id, 'Downloading %s JSON' % location['loc'])
  55. file_ = media.get('file')
  56. if not file_:
  57. continue
  58. formats.extend(self._extract_f4m_formats(
  59. file_ + '&hdcore=3.2.0&plugin=aasp-3.2.0.77.18',
  60. display_id, f4m_id=loc))
  61. title = self._search_regex(
  62. r'class="Destacado-text"[^>]*>\s*<strong>([^<]+)</strong>', webpage, 'title')
  63. video_id = self._search_regex(
  64. r'data-media-id\s*=\s*"([^"]+)"', webpage,
  65. 'data media id', default=None) or display_id
  66. thumbnail = config.get('poster', {}).get('imageUrl')
  67. duration = int_or_none(mmc.get('duration'))
  68. return {
  69. 'id': video_id,
  70. 'display_id': display_id,
  71. 'title': title,
  72. 'description': get_element_by_attribute('class', 'text', webpage),
  73. 'thumbnail': thumbnail,
  74. 'duration': duration,
  75. 'formats': formats,
  76. }