You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

159 lines
6.2 KiB

  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. from .common import InfoExtractor
  5. from .nexx import (
  6. NexxIE,
  7. NexxEmbedIE,
  8. )
  9. from .spiegeltv import SpiegeltvIE
  10. from ..compat import compat_urlparse
  11. from ..utils import (
  12. parse_duration,
  13. strip_or_none,
  14. unified_timestamp,
  15. )
  16. class SpiegelIE(InfoExtractor):
  17. _VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<id>[0-9]+)(?:-embed|-iframe)?(?:\.html)?(?:#.*)?$'
  18. _TESTS = [{
  19. 'url': 'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html',
  20. 'md5': 'b57399839d055fccfeb9a0455c439868',
  21. 'info_dict': {
  22. 'id': '563747',
  23. 'ext': 'mp4',
  24. 'title': 'Vulkanausbruch in Ecuador: Der "Feuerschlund" ist wieder aktiv',
  25. 'description': 'md5:8029d8310232196eb235d27575a8b9f4',
  26. 'duration': 49,
  27. 'upload_date': '20130311',
  28. 'timestamp': 1362994320,
  29. },
  30. }, {
  31. 'url': 'http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html',
  32. 'md5': '5b6c2f4add9d62912ed5fc78a1faed80',
  33. 'info_dict': {
  34. 'id': '580988',
  35. 'ext': 'mp4',
  36. 'title': 'Schach-WM in der Videoanalyse: Carlsen nutzt die Fehlgriffe des Titelverteidigers',
  37. 'description': 'md5:c2322b65e58f385a820c10fa03b2d088',
  38. 'duration': 983,
  39. 'upload_date': '20131115',
  40. 'timestamp': 1384546642,
  41. },
  42. }, {
  43. 'url': 'http://www.spiegel.de/video/astronaut-alexander-gerst-von-der-iss-station-beantwortet-fragen-video-1519126-embed.html',
  44. 'md5': '97b91083a672d72976faa8433430afb9',
  45. 'info_dict': {
  46. 'id': '601883',
  47. 'ext': 'mp4',
  48. 'description': 'SPIEGEL ONLINE-Nutzer durften den deutschen Astronauten Alexander Gerst über sein Leben auf der ISS-Station befragen. Hier kommen seine Antworten auf die besten sechs Fragen.',
  49. 'title': 'Fragen an Astronaut Alexander Gerst: "Bekommen Sie die Tageszeiten mit?"',
  50. 'upload_date': '20140904',
  51. 'timestamp': 1409834160,
  52. }
  53. }, {
  54. 'url': 'http://www.spiegel.de/video/astronaut-alexander-gerst-von-der-iss-station-beantwortet-fragen-video-1519126-iframe.html',
  55. 'only_matching': True,
  56. }, {
  57. # nexx video
  58. 'url': 'http://www.spiegel.de/video/spiegel-tv-magazin-ueber-guellekrise-in-schleswig-holstein-video-99012776.html',
  59. 'only_matching': True,
  60. }]
  61. def _real_extract(self, url):
  62. video_id = self._match_id(url)
  63. metadata_url = 'http://www.spiegel.de/video/metadata/video-%s.json' % video_id
  64. handle = self._request_webpage(metadata_url, video_id)
  65. # 302 to spiegel.tv, like http://www.spiegel.de/video/der-film-zum-wochenende-die-wahrheit-ueber-maenner-video-99003272.html
  66. if SpiegeltvIE.suitable(handle.geturl()):
  67. return self.url_result(handle.geturl(), 'Spiegeltv')
  68. video_data = self._parse_json(self._webpage_read_content(
  69. handle, metadata_url, video_id), video_id)
  70. title = video_data['title']
  71. nexx_id = video_data['nexxOmniaId']
  72. domain_id = video_data.get('nexxOmniaDomain') or '748'
  73. return {
  74. '_type': 'url_transparent',
  75. 'id': video_id,
  76. 'url': 'nexx:%s:%s' % (domain_id, nexx_id),
  77. 'title': title,
  78. 'description': strip_or_none(video_data.get('teaser')),
  79. 'duration': parse_duration(video_data.get('duration')),
  80. 'timestamp': unified_timestamp(video_data.get('datum')),
  81. 'ie_key': NexxIE.ie_key(),
  82. }
  83. class SpiegelArticleIE(InfoExtractor):
  84. _VALID_URL = r'https?://(?:www\.)?spiegel\.de/(?!video/)[^?#]*?-(?P<id>[0-9]+)\.html'
  85. IE_NAME = 'Spiegel:Article'
  86. IE_DESC = 'Articles on spiegel.de'
  87. _TESTS = [{
  88. 'url': 'http://www.spiegel.de/sport/sonst/badminton-wm-die-randsportart-soll-populaerer-werden-a-987092.html',
  89. 'info_dict': {
  90. 'id': '1516455',
  91. 'ext': 'mp4',
  92. 'title': 'Faszination Badminton: Nennt es bloß nicht Federball',
  93. 'description': 're:^Patrick Kämnitz gehört.{100,}',
  94. 'upload_date': '20140825',
  95. },
  96. }, {
  97. 'url': 'http://www.spiegel.de/wissenschaft/weltall/astronaut-alexander-gerst-antwortet-spiegel-online-lesern-a-989876.html',
  98. 'info_dict': {
  99. },
  100. 'playlist_count': 6,
  101. }, {
  102. # Nexx iFrame embed
  103. 'url': 'http://www.spiegel.de/sptv/spiegeltv/spiegel-tv-ueber-schnellste-katapult-achterbahn-der-welt-taron-a-1137884.html',
  104. 'info_dict': {
  105. 'id': '161464',
  106. 'ext': 'mp4',
  107. 'title': 'Nervenkitzel Achterbahn',
  108. 'alt_title': 'Karussellbauer in Deutschland',
  109. 'description': 'md5:ffe7b1cc59a01f585e0569949aef73cc',
  110. 'release_year': 2005,
  111. 'creator': 'SPIEGEL TV',
  112. 'thumbnail': r're:^https?://.*\.jpg$',
  113. 'duration': 2761,
  114. 'timestamp': 1394021479,
  115. 'upload_date': '20140305',
  116. },
  117. 'params': {
  118. 'format': 'bestvideo',
  119. 'skip_download': True,
  120. },
  121. }]
  122. def _real_extract(self, url):
  123. video_id = self._match_id(url)
  124. webpage = self._download_webpage(url, video_id)
  125. # Single video on top of the page
  126. video_link = self._search_regex(
  127. r'<a href="([^"]+)" onclick="return spOpenVideo\(this,', webpage,
  128. 'video page URL', default=None)
  129. if video_link:
  130. video_url = compat_urlparse.urljoin(
  131. self.http_scheme() + '//spiegel.de/', video_link)
  132. return self.url_result(video_url)
  133. # Multiple embedded videos
  134. embeds = re.findall(
  135. r'<div class="vid_holder[0-9]+.*?</div>\s*.*?url\s*=\s*"([^"]+)"',
  136. webpage)
  137. entries = [
  138. self.url_result(compat_urlparse.urljoin(
  139. self.http_scheme() + '//spiegel.de/', embed_path))
  140. for embed_path in embeds]
  141. if embeds:
  142. return self.playlist_result(entries)
  143. return self.playlist_from_matches(
  144. NexxEmbedIE._extract_urls(webpage), ie=NexxEmbedIE.ie_key())