You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

109 lines
3.6 KiB

7 years ago
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. from .common import InfoExtractor
  5. from ..utils import (
  6. clean_html,
  7. int_or_none,
  8. parse_duration,
  9. parse_iso8601,
  10. parse_resolution,
  11. url_or_none,
  12. )
  13. class CCMAIE(InfoExtractor):
  14. _VALID_URL = r'https?://(?:www\.)?ccma\.cat/(?:[^/]+/)*?(?P<type>video|audio)/(?P<id>\d+)'
  15. _TESTS = [{
  16. 'url': 'http://www.ccma.cat/tv3/alacarta/lespot-de-la-marato-de-tv3/lespot-de-la-marato-de-tv3/video/5630208/',
  17. 'md5': '7296ca43977c8ea4469e719c609b0871',
  18. 'info_dict': {
  19. 'id': '5630208',
  20. 'ext': 'mp4',
  21. 'title': 'L\'espot de La Marató de TV3',
  22. 'description': 'md5:f12987f320e2f6e988e9908e4fe97765',
  23. 'timestamp': 1470918540,
  24. 'upload_date': '20160811',
  25. }
  26. }, {
  27. 'url': 'http://www.ccma.cat/catradio/alacarta/programa/el-consell-de-savis-analitza-el-derbi/audio/943685/',
  28. 'md5': 'fa3e38f269329a278271276330261425',
  29. 'info_dict': {
  30. 'id': '943685',
  31. 'ext': 'mp3',
  32. 'title': 'El Consell de Savis analitza el derbi',
  33. 'description': 'md5:e2a3648145f3241cb9c6b4b624033e53',
  34. 'upload_date': '20171205',
  35. 'timestamp': 1512507300,
  36. }
  37. }]
  38. def _real_extract(self, url):
  39. media_type, media_id = re.match(self._VALID_URL, url).groups()
  40. media = self._download_json(
  41. 'http://dinamics.ccma.cat/pvideo/media.jsp', media_id, query={
  42. 'media': media_type,
  43. 'idint': media_id,
  44. })
  45. formats = []
  46. media_url = media['media']['url']
  47. if isinstance(media_url, list):
  48. for format_ in media_url:
  49. format_url = url_or_none(format_.get('file'))
  50. if not format_url:
  51. continue
  52. label = format_.get('label')
  53. f = parse_resolution(label)
  54. f.update({
  55. 'url': format_url,
  56. 'format_id': label,
  57. })
  58. formats.append(f)
  59. else:
  60. formats.append({
  61. 'url': media_url,
  62. 'vcodec': 'none' if media_type == 'audio' else None,
  63. })
  64. self._sort_formats(formats)
  65. informacio = media['informacio']
  66. title = informacio['titol']
  67. durada = informacio.get('durada', {})
  68. duration = int_or_none(durada.get('milisegons'), 1000) or parse_duration(durada.get('text'))
  69. timestamp = parse_iso8601(informacio.get('data_emissio', {}).get('utc'))
  70. subtitles = {}
  71. subtitols = media.get('subtitols', {})
  72. if subtitols:
  73. sub_url = subtitols.get('url')
  74. if sub_url:
  75. subtitles.setdefault(
  76. subtitols.get('iso') or subtitols.get('text') or 'ca', []).append({
  77. 'url': sub_url,
  78. })
  79. thumbnails = []
  80. imatges = media.get('imatges', {})
  81. if imatges:
  82. thumbnail_url = imatges.get('url')
  83. if thumbnail_url:
  84. thumbnails = [{
  85. 'url': thumbnail_url,
  86. 'width': int_or_none(imatges.get('amplada')),
  87. 'height': int_or_none(imatges.get('alcada')),
  88. }]
  89. return {
  90. 'id': media_id,
  91. 'title': title,
  92. 'description': clean_html(informacio.get('descripcio')),
  93. 'duration': duration,
  94. 'timestamp': timestamp,
  95. 'thumbnails': thumbnails,
  96. 'subtitles': subtitles,
  97. 'formats': formats,
  98. }