You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

216 lines
8.8 KiB

  1. from __future__ import unicode_literals
  2. import re
  3. from .subtitles import SubtitlesInfoExtractor
  4. from ..utils import ExtractorError
  5. class BBCCoUkIE(SubtitlesInfoExtractor):
  6. IE_NAME = 'bbc.co.uk'
  7. IE_DESC = 'BBC iPlayer'
  8. _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:programmes|iplayer/episode)/(?P<id>[\da-z]{8})'
  9. _TESTS = [
  10. {
  11. 'url': 'http://www.bbc.co.uk/programmes/p01q7wz1',
  12. 'info_dict': {
  13. 'id': 'p01q7wz4',
  14. 'ext': 'flv',
  15. 'title': 'Friction: Blu Mar Ten guest mix: Blu Mar Ten - Guest Mix',
  16. 'description': 'Blu Mar Ten deliver a Guest Mix for Friction.',
  17. 'duration': 1936,
  18. },
  19. 'params': {
  20. # rtmp download
  21. 'skip_download': True,
  22. }
  23. },
  24. {
  25. 'url': 'http://www.bbc.co.uk/iplayer/episode/b00yng5w/The_Man_in_Black_Series_3_The_Printed_Name/',
  26. 'info_dict': {
  27. 'id': 'b00yng1d',
  28. 'ext': 'flv',
  29. 'title': 'The Man in Black: Series 3: The Printed Name',
  30. 'description': "Mark Gatiss introduces Nicholas Pierpan's chilling tale of a writer's devilish pact with a mysterious man. Stars Ewan Bailey.",
  31. 'duration': 1800,
  32. },
  33. 'params': {
  34. # rtmp download
  35. 'skip_download': True,
  36. }
  37. },
  38. {
  39. 'url': 'http://www.bbc.co.uk/iplayer/episode/b03vhd1f/The_Voice_UK_Series_3_Blind_Auditions_5/',
  40. 'info_dict': {
  41. 'id': 'b00yng1d',
  42. 'ext': 'flv',
  43. 'title': 'The Voice UK: Series 3: Blind Auditions 5',
  44. 'description': "Emma Willis and Marvin Humes present the fifth set of blind auditions in the singing competition, as the coaches continue to build their teams based on voice alone.",
  45. 'duration': 5100,
  46. },
  47. 'params': {
  48. # rtmp download
  49. 'skip_download': True,
  50. },
  51. 'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
  52. }
  53. ]
  54. def _extract_asx_playlist(self, connection, programme_id):
  55. asx = self._download_xml(connection.get('href'), programme_id, 'Downloading ASX playlist')
  56. return [ref.get('href') for ref in asx.findall('./Entry/ref')]
  57. def _extract_connection(self, connection, programme_id):
  58. formats = []
  59. protocol = connection.get('protocol')
  60. supplier = connection.get('supplier')
  61. if protocol == 'http':
  62. href = connection.get('href')
  63. # ASX playlist
  64. if supplier == 'asx':
  65. for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)):
  66. formats.append({
  67. 'url': ref,
  68. 'format_id': 'ref%s_%s' % (i, supplier),
  69. })
  70. # Direct link
  71. else:
  72. formats.append({
  73. 'url': href,
  74. 'format_id': supplier,
  75. })
  76. elif protocol == 'rtmp':
  77. application = connection.get('application', 'ondemand')
  78. auth_string = connection.get('authString')
  79. identifier = connection.get('identifier')
  80. server = connection.get('server')
  81. formats.append({
  82. 'url': '%s://%s/%s?%s' % (protocol, server, application, auth_string),
  83. 'play_path': identifier,
  84. 'app': '%s?%s' % (application, auth_string),
  85. 'page_url': 'http://www.bbc.co.uk',
  86. 'player_url': 'http://www.bbc.co.uk/emp/releases/iplayer/revisions/617463_618125_4/617463_618125_4_emp.swf',
  87. 'rtmp_live': False,
  88. 'ext': 'flv',
  89. 'format_id': supplier,
  90. })
  91. return formats
  92. def _extract_items(self, playlist):
  93. return playlist.findall('./{http://bbc.co.uk/2008/emp/playlist}item')
  94. def _extract_medias(self, media_selection):
  95. return media_selection.findall('./{http://bbc.co.uk/2008/mp/mediaselection}media')
  96. def _extract_connections(self, media):
  97. return media.findall('./{http://bbc.co.uk/2008/mp/mediaselection}connection')
  98. def _extract_video(self, media, programme_id):
  99. formats = []
  100. vbr = int(media.get('bitrate'))
  101. vcodec = media.get('encoding')
  102. service = media.get('service')
  103. width = int(media.get('width'))
  104. height = int(media.get('height'))
  105. file_size = int(media.get('media_file_size'))
  106. for connection in self._extract_connections(media):
  107. conn_formats = self._extract_connection(connection, programme_id)
  108. for format in conn_formats:
  109. format.update({
  110. 'format_id': '%s_%s' % (service, format['format_id']),
  111. 'width': width,
  112. 'height': height,
  113. 'vbr': vbr,
  114. 'vcodec': vcodec,
  115. 'filesize': file_size,
  116. })
  117. formats.extend(conn_formats)
  118. return formats
  119. def _extract_audio(self, media, programme_id):
  120. formats = []
  121. abr = int(media.get('bitrate'))
  122. acodec = media.get('encoding')
  123. service = media.get('service')
  124. for connection in self._extract_connections(media):
  125. conn_formats = self._extract_connection(connection, programme_id)
  126. for format in conn_formats:
  127. format.update({
  128. 'format_id': '%s_%s' % (service, format['format_id']),
  129. 'abr': abr,
  130. 'acodec': acodec,
  131. })
  132. formats.extend(conn_formats)
  133. return formats
  134. def _extract_captions(self, media, programme_id):
  135. subtitles = {}
  136. for connection in self._extract_connections(media):
  137. captions = self._download_xml(connection.get('href'), programme_id, 'Downloading captions')
  138. lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en')
  139. ps = captions.findall('./{0}body/{0}div/{0}p'.format('{http://www.w3.org/2006/10/ttaf1}'))
  140. srt = ''
  141. for pos, p in enumerate(ps):
  142. srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), p.get('begin'), p.get('end'),
  143. p.text.strip() if p.text is not None else '')
  144. subtitles[lang] = srt
  145. return subtitles
  146. def _real_extract(self, url):
  147. mobj = re.match(self._VALID_URL, url)
  148. group_id = mobj.group('id')
  149. playlist = self._download_xml('http://www.bbc.co.uk/iplayer/playlist/%s' % group_id, group_id,
  150. 'Downloading playlist XML')
  151. no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems')
  152. if no_items is not None:
  153. reason = no_items.get('reason')
  154. if reason == 'preAvailability':
  155. msg = 'Episode %s is not yet available' % group_id
  156. elif reason == 'postAvailability':
  157. msg = 'Episode %s is no longer available' % group_id
  158. else:
  159. msg = 'Episode %s is not available: %s' % (group_id, reason)
  160. raise ExtractorError(msg, expected=True)
  161. formats = []
  162. subtitles = None
  163. for item in self._extract_items(playlist):
  164. kind = item.get('kind')
  165. if kind != 'programme' and kind != 'radioProgramme':
  166. continue
  167. title = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}title').text
  168. description = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}summary').text
  169. programme_id = item.get('identifier')
  170. duration = int(item.get('duration'))
  171. media_selection = self._download_xml(
  172. 'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s' % programme_id,
  173. programme_id, 'Downloading media selection XML')
  174. for media in self._extract_medias(media_selection):
  175. kind = media.get('kind')
  176. if kind == 'audio':
  177. formats.extend(self._extract_audio(media, programme_id))
  178. elif kind == 'video':
  179. formats.extend(self._extract_video(media, programme_id))
  180. elif kind == 'captions':
  181. subtitles = self._extract_captions(media, programme_id)
  182. if self._downloader.params.get('listsubtitles', False):
  183. self._list_available_subtitles(programme_id, subtitles)
  184. return
  185. self._sort_formats(formats)
  186. return {
  187. 'id': programme_id,
  188. 'title': title,
  189. 'description': description,
  190. 'duration': duration,
  191. 'formats': formats,
  192. 'subtitles': subtitles,
  193. }