You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

99 lines
3.5 KiB

  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. from .common import InfoExtractor
  5. from ..utils import (
  6. int_or_none,
  7. parse_duration,
  8. parse_iso8601,
  9. clean_html,
  10. )
  11. class CCMAIE(InfoExtractor):
  12. _VALID_URL = r'https?://(?:www\.)?ccma\.cat/(?:[^/]+/)*?(?P<type>video|audio)/(?P<id>\d+)'
  13. _TESTS = [{
  14. 'url': 'http://www.ccma.cat/tv3/alacarta/lespot-de-la-marato-de-tv3/lespot-de-la-marato-de-tv3/video/5630208/',
  15. 'md5': '7296ca43977c8ea4469e719c609b0871',
  16. 'info_dict': {
  17. 'id': '5630208',
  18. 'ext': 'mp4',
  19. 'title': 'L\'espot de La Marató de TV3',
  20. 'description': 'md5:f12987f320e2f6e988e9908e4fe97765',
  21. 'timestamp': 1470918540,
  22. 'upload_date': '20160811',
  23. }
  24. }, {
  25. 'url': 'http://www.ccma.cat/catradio/alacarta/programa/el-consell-de-savis-analitza-el-derbi/audio/943685/',
  26. 'md5': 'fa3e38f269329a278271276330261425',
  27. 'info_dict': {
  28. 'id': '943685',
  29. 'ext': 'mp3',
  30. 'title': 'El Consell de Savis analitza el derbi',
  31. 'description': 'md5:e2a3648145f3241cb9c6b4b624033e53',
  32. 'upload_date': '20171205',
  33. 'timestamp': 1512507300,
  34. }
  35. }]
  36. def _real_extract(self, url):
  37. media_type, media_id = re.match(self._VALID_URL, url).groups()
  38. media_data = {}
  39. formats = []
  40. profiles = ['pc'] if media_type == 'audio' else ['mobil', 'pc']
  41. for i, profile in enumerate(profiles):
  42. md = self._download_json('http://dinamics.ccma.cat/pvideo/media.jsp', media_id, query={
  43. 'media': media_type,
  44. 'idint': media_id,
  45. 'profile': profile,
  46. }, fatal=False)
  47. if md:
  48. media_data = md
  49. media_url = media_data.get('media', {}).get('url')
  50. if media_url:
  51. formats.append({
  52. 'format_id': profile,
  53. 'url': media_url,
  54. 'quality': i,
  55. })
  56. self._sort_formats(formats)
  57. informacio = media_data['informacio']
  58. title = informacio['titol']
  59. durada = informacio.get('durada', {})
  60. duration = int_or_none(durada.get('milisegons'), 1000) or parse_duration(durada.get('text'))
  61. timestamp = parse_iso8601(informacio.get('data_emissio', {}).get('utc'))
  62. subtitles = {}
  63. subtitols = media_data.get('subtitols', {})
  64. if subtitols:
  65. sub_url = subtitols.get('url')
  66. if sub_url:
  67. subtitles.setdefault(
  68. subtitols.get('iso') or subtitols.get('text') or 'ca', []).append({
  69. 'url': sub_url,
  70. })
  71. thumbnails = []
  72. imatges = media_data.get('imatges', {})
  73. if imatges:
  74. thumbnail_url = imatges.get('url')
  75. if thumbnail_url:
  76. thumbnails = [{
  77. 'url': thumbnail_url,
  78. 'width': int_or_none(imatges.get('amplada')),
  79. 'height': int_or_none(imatges.get('alcada')),
  80. }]
  81. return {
  82. 'id': media_id,
  83. 'title': title,
  84. 'description': clean_html(informacio.get('descripcio')),
  85. 'duration': duration,
  86. 'timestamp': timestamp,
  87. 'thumnails': thumbnails,
  88. 'subtitles': subtitles,
  89. 'formats': formats,
  90. }