You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

115 lines
3.8 KiB

  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. from .common import InfoExtractor
  4. from ..utils import (
  5. determine_ext,
  6. int_or_none,
  7. str_or_none,
  8. )
  9. class SverigesRadioBaseIE(InfoExtractor):
  10. _BASE_URL = 'https://sverigesradio.se/sida/playerajax/'
  11. _QUALITIES = ['low', 'medium', 'high']
  12. _EXT_TO_CODEC_MAP = {
  13. 'mp3': 'mp3',
  14. 'm4a': 'aac',
  15. }
  16. _CODING_FORMAT_TO_ABR_MAP = {
  17. 5: 128,
  18. 11: 192,
  19. 12: 32,
  20. 13: 96,
  21. }
  22. def _real_extract(self, url):
  23. audio_id = self._match_id(url)
  24. query = {
  25. 'id': audio_id,
  26. 'type': self._AUDIO_TYPE,
  27. }
  28. item = self._download_json(
  29. self._BASE_URL + 'audiometadata', audio_id,
  30. 'Downloading audio JSON metadata', query=query)['items'][0]
  31. title = item['subtitle']
  32. query['format'] = 'iis'
  33. urls = []
  34. formats = []
  35. for quality in self._QUALITIES:
  36. query['quality'] = quality
  37. audio_url_data = self._download_json(
  38. self._BASE_URL + 'getaudiourl', audio_id,
  39. 'Downloading %s format JSON metadata' % quality,
  40. fatal=False, query=query) or {}
  41. audio_url = audio_url_data.get('audioUrl')
  42. if not audio_url or audio_url in urls:
  43. continue
  44. urls.append(audio_url)
  45. ext = determine_ext(audio_url)
  46. coding_format = audio_url_data.get('codingFormat')
  47. abr = int_or_none(self._search_regex(
  48. r'_a(\d+)\.m4a', audio_url, 'audio bitrate',
  49. default=None)) or self._CODING_FORMAT_TO_ABR_MAP.get(coding_format)
  50. formats.append({
  51. 'abr': abr,
  52. 'acodec': self._EXT_TO_CODEC_MAP.get(ext),
  53. 'ext': ext,
  54. 'format_id': str_or_none(coding_format),
  55. 'vcodec': 'none',
  56. 'url': audio_url,
  57. })
  58. self._sort_formats(formats)
  59. return {
  60. 'id': audio_id,
  61. 'title': title,
  62. 'formats': formats,
  63. 'series': item.get('title'),
  64. 'duration': int_or_none(item.get('duration')),
  65. 'thumbnail': item.get('displayimageurl'),
  66. 'description': item.get('description'),
  67. }
  68. class SverigesRadioPublicationIE(SverigesRadioBaseIE):
  69. IE_NAME = 'sverigesradio:publication'
  70. _VALID_URL = r'https?://(?:www\.)?sverigesradio\.se/sida/(?:artikel|gruppsida)\.aspx\?.*?\bartikel=(?P<id>[0-9]+)'
  71. _TESTS = [{
  72. 'url': 'https://sverigesradio.se/sida/artikel.aspx?programid=83&artikel=7038546',
  73. 'md5': '6a4917e1923fccb080e5a206a5afa542',
  74. 'info_dict': {
  75. 'id': '7038546',
  76. 'ext': 'm4a',
  77. 'duration': 132,
  78. 'series': 'Nyheter (Ekot)',
  79. 'title': 'Esa Teittinen: Sanningen har inte kommit fram',
  80. 'description': 'md5:daf7ce66a8f0a53d5465a5984d3839df',
  81. 'thumbnail': r're:^https?://.*\.jpg',
  82. },
  83. }, {
  84. 'url': 'https://sverigesradio.se/sida/gruppsida.aspx?programid=3304&grupp=6247&artikel=7146887',
  85. 'only_matching': True,
  86. }]
  87. _AUDIO_TYPE = 'publication'
  88. class SverigesRadioEpisodeIE(SverigesRadioBaseIE):
  89. IE_NAME = 'sverigesradio:episode'
  90. _VALID_URL = r'https?://(?:www\.)?sverigesradio\.se/(?:sida/)?avsnitt/(?P<id>[0-9]+)'
  91. _TEST = {
  92. 'url': 'https://sverigesradio.se/avsnitt/1140922?programid=1300',
  93. 'md5': '20dc4d8db24228f846be390b0c59a07c',
  94. 'info_dict': {
  95. 'id': '1140922',
  96. 'ext': 'mp3',
  97. 'duration': 3307,
  98. 'series': 'Konflikt',
  99. 'title': 'Metoo och valen',
  100. 'description': 'md5:fcb5c1f667f00badcc702b196f10a27e',
  101. 'thumbnail': r're:^https?://.*\.jpg',
  102. }
  103. }
  104. _AUDIO_TYPE = 'episode'