You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

101 lines
3.2 KiB

  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import json
  4. import re
  5. from .common import InfoExtractor
  6. from ..utils import ExtractorError
  7. class TuneInIE(InfoExtractor):
  8. _VALID_URL = r'''(?x)https?://(?:www\.)?
  9. (?:
  10. tunein\.com/
  11. (?:
  12. radio/.*?-s|
  13. station/.*?StationId\=
  14. )(?P<id>[0-9]+)
  15. |tun\.in/(?P<redirect_id>[A-Za-z0-9]+)
  16. )
  17. '''
  18. _INFO_DICT = {
  19. 'id': '34682',
  20. 'title': 'Jazz 24 on 88.5 Jazz24 - KPLU-HD2',
  21. 'ext': 'AAC',
  22. 'thumbnail': 're:^https?://.*\.png$',
  23. 'location': 'Tacoma, WA',
  24. }
  25. _TESTS = [
  26. {
  27. 'url': 'http://tunein.com/radio/Jazz24-885-s34682/',
  28. 'info_dict': _INFO_DICT,
  29. 'params': {
  30. 'skip_download': True, # live stream
  31. },
  32. },
  33. { # test redirection
  34. 'url': 'http://tun.in/ser7s',
  35. 'info_dict': _INFO_DICT,
  36. 'params': {
  37. 'skip_download': True, # live stream
  38. },
  39. },
  40. ]
  41. def _real_extract(self, url):
  42. mobj = re.match(self._VALID_URL, url)
  43. redirect_id = mobj.group('redirect_id')
  44. if redirect_id:
  45. # The server doesn't support HEAD requests
  46. urlh = self._request_webpage(
  47. url, redirect_id, note='Downloading redirect page')
  48. url = urlh.geturl()
  49. self.to_screen('Following redirect: %s' % url)
  50. mobj = re.match(self._VALID_URL, url)
  51. station_id = mobj.group('id')
  52. webpage = self._download_webpage(
  53. url, station_id, note='Downloading station webpage')
  54. payload = self._html_search_regex(
  55. r'(?m)TuneIn\.payload\s*=\s*(\{[^$]+?)$', webpage, 'JSON data')
  56. json_data = json.loads(payload)
  57. station_info = json_data['Station']['broadcast']
  58. title = station_info['Title']
  59. thumbnail = station_info.get('Logo')
  60. location = station_info.get('Location')
  61. streams_url = station_info.get('StreamUrl')
  62. if not streams_url:
  63. raise ExtractorError('No downloadable streams found',
  64. expected=True)
  65. stream_data = self._download_webpage(
  66. streams_url, station_id, note='Downloading stream data')
  67. streams = json.loads(self._search_regex(
  68. r'\((.*)\);', stream_data, 'stream info'))['Streams']
  69. is_live = None
  70. formats = []
  71. for stream in streams:
  72. if stream.get('Type') == 'Live':
  73. is_live = True
  74. formats.append({
  75. 'abr': stream.get('Bandwidth'),
  76. 'ext': stream.get('MediaType'),
  77. 'acodec': stream.get('MediaType'),
  78. 'vcodec': 'none',
  79. 'url': stream.get('Url'),
  80. # Sometimes streams with the highest quality do not exist
  81. 'preference': stream.get('Reliability'),
  82. })
  83. self._sort_formats(formats)
  84. return {
  85. 'id': station_id,
  86. 'title': title,
  87. 'formats': formats,
  88. 'thumbnail': thumbnail,
  89. 'location': location,
  90. 'is_live': is_live,
  91. }