You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

326 lines
12 KiB

10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
  1. from __future__ import unicode_literals
  2. import re
  3. import json
  4. import os
  5. from .common import InfoExtractor
  6. from ..compat import (
  7. compat_urlparse,
  8. compat_urllib_parse_urlencode,
  9. compat_urllib_parse_urlparse,
  10. compat_str,
  11. )
  12. from ..utils import (
  13. unified_strdate,
  14. determine_ext,
  15. int_or_none,
  16. parse_iso8601,
  17. parse_duration,
  18. )
  19. class NHLBaseInfoExtractor(InfoExtractor):
  20. @staticmethod
  21. def _fix_json(json_string):
  22. return json_string.replace('\\\'', '\'')
  23. def _real_extract_video(self, video_id):
  24. vid_parts = video_id.split(',')
  25. if len(vid_parts) == 3:
  26. video_id = '%s0%s%s-X-h' % (vid_parts[0][:4], vid_parts[1], vid_parts[2].rjust(4, '0'))
  27. json_url = 'http://video.nhl.com/videocenter/servlets/playlist?ids=%s&format=json' % video_id
  28. data = self._download_json(
  29. json_url, video_id, transform_source=self._fix_json)
  30. return self._extract_video(data[0])
  31. def _extract_video(self, info):
  32. video_id = info['id']
  33. self.report_extraction(video_id)
  34. initial_video_url = info['publishPoint']
  35. if info['formats'] == '1':
  36. parsed_url = compat_urllib_parse_urlparse(initial_video_url)
  37. filename, ext = os.path.splitext(parsed_url.path)
  38. path = '%s_sd%s' % (filename, ext)
  39. data = compat_urllib_parse_urlencode({
  40. 'type': 'fvod',
  41. 'path': compat_urlparse.urlunparse(parsed_url[:2] + (path,) + parsed_url[3:])
  42. })
  43. path_url = 'http://video.nhl.com/videocenter/servlets/encryptvideopath?' + data
  44. path_doc = self._download_xml(
  45. path_url, video_id, 'Downloading final video url')
  46. video_url = path_doc.find('path').text
  47. else:
  48. video_url = initial_video_url
  49. join = compat_urlparse.urljoin
  50. ret = {
  51. 'id': video_id,
  52. 'title': info['name'],
  53. 'url': video_url,
  54. 'description': info['description'],
  55. 'duration': int(info['duration']),
  56. 'thumbnail': join(join(video_url, '/u/'), info['bigImage']),
  57. 'upload_date': unified_strdate(info['releaseDate'].split('.')[0]),
  58. }
  59. if video_url.startswith('rtmp:'):
  60. mobj = re.match(r'(?P<tc_url>rtmp://[^/]+/(?P<app>[a-z0-9/]+))/(?P<play_path>mp4:.*)', video_url)
  61. ret.update({
  62. 'tc_url': mobj.group('tc_url'),
  63. 'play_path': mobj.group('play_path'),
  64. 'app': mobj.group('app'),
  65. 'no_resume': True,
  66. })
  67. return ret
  68. class NHLVideocenterIE(NHLBaseInfoExtractor):
  69. IE_NAME = 'nhl.com:videocenter'
  70. _VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/(?:console|embed)?(?:\?(?:.*?[?&])?)(?:id|hlg|playlist)=(?P<id>[-0-9a-zA-Z,]+)'
  71. _TESTS = [{
  72. 'url': 'http://video.canucks.nhl.com/videocenter/console?catid=6?id=453614',
  73. 'md5': 'db704a4ea09e8d3988c85e36cc892d09',
  74. 'info_dict': {
  75. 'id': '453614',
  76. 'ext': 'mp4',
  77. 'title': 'Quick clip: Weise 4-3 goal vs Flames',
  78. 'description': 'Dale Weise scores his first of the season to put the Canucks up 4-3.',
  79. 'duration': 18,
  80. 'upload_date': '20131006',
  81. },
  82. }, {
  83. 'url': 'http://video.nhl.com/videocenter/console?id=2014020024-628-h',
  84. 'md5': 'd22e82bc592f52d37d24b03531ee9696',
  85. 'info_dict': {
  86. 'id': '2014020024-628-h',
  87. 'ext': 'mp4',
  88. 'title': 'Alex Galchenyuk Goal on Ray Emery (14:40/3rd)',
  89. 'description': 'Home broadcast - Montreal Canadiens at Philadelphia Flyers - October 11, 2014',
  90. 'duration': 0,
  91. 'upload_date': '20141011',
  92. },
  93. }, {
  94. 'url': 'http://video.mapleleafs.nhl.com/videocenter/console?id=58665&catid=802',
  95. 'md5': 'c78fc64ea01777e426cfc202b746c825',
  96. 'info_dict': {
  97. 'id': '58665',
  98. 'ext': 'flv',
  99. 'title': 'Classic Game In Six - April 22, 1979',
  100. 'description': 'It was the last playoff game for the Leafs in the decade, and the last time the Leafs and Habs played in the playoffs. Great game, not a great ending.',
  101. 'duration': 400,
  102. 'upload_date': '20100129'
  103. },
  104. }, {
  105. 'url': 'http://video.flames.nhl.com/videocenter/console?id=630616',
  106. 'only_matching': True,
  107. }, {
  108. 'url': 'http://video.nhl.com/videocenter/?id=736722',
  109. 'only_matching': True,
  110. }, {
  111. 'url': 'http://video.nhl.com/videocenter/console?hlg=20142015,2,299&lang=en',
  112. 'md5': '076fcb88c255154aacbf0a7accc3f340',
  113. 'info_dict': {
  114. 'id': '2014020299-X-h',
  115. 'ext': 'mp4',
  116. 'title': 'Penguins at Islanders / Game Highlights',
  117. 'description': 'Home broadcast - Pittsburgh Penguins at New York Islanders - November 22, 2014',
  118. 'duration': 268,
  119. 'upload_date': '20141122',
  120. }
  121. }, {
  122. 'url': 'http://video.oilers.nhl.com/videocenter/console?id=691469&catid=4',
  123. 'info_dict': {
  124. 'id': '691469',
  125. 'ext': 'mp4',
  126. 'title': 'RAW | Craig MacTavish Full Press Conference',
  127. 'description': 'Oilers GM Craig MacTavish addresses the media at Rexall Place on Friday.',
  128. 'upload_date': '20141205',
  129. },
  130. 'params': {
  131. 'skip_download': True, # Requires rtmpdump
  132. }
  133. }, {
  134. 'url': 'http://video.nhl.com/videocenter/embed?playlist=836127',
  135. 'only_matching': True,
  136. }]
  137. def _real_extract(self, url):
  138. video_id = self._match_id(url)
  139. return self._real_extract_video(video_id)
  140. class NHLNewsIE(NHLBaseInfoExtractor):
  141. IE_NAME = 'nhl.com:news'
  142. IE_DESC = 'NHL news'
  143. _VALID_URL = r'https?://(?:.+?\.)?nhl\.com/(?:ice|club)/news\.html?(?:\?(?:.*?[?&])?)id=(?P<id>[-0-9a-zA-Z]+)'
  144. _TESTS = [{
  145. 'url': 'http://www.nhl.com/ice/news.htm?id=750727',
  146. 'md5': '4b3d1262e177687a3009937bd9ec0be8',
  147. 'info_dict': {
  148. 'id': '736722',
  149. 'ext': 'mp4',
  150. 'title': 'Cal Clutterbuck has been fined $2,000',
  151. 'description': 'md5:45fe547d30edab88b23e0dd0ab1ed9e6',
  152. 'duration': 37,
  153. 'upload_date': '20150128',
  154. },
  155. }, {
  156. # iframe embed
  157. 'url': 'http://sabres.nhl.com/club/news.htm?id=780189',
  158. 'md5': '9f663d1c006c90ac9fb82777d4294e12',
  159. 'info_dict': {
  160. 'id': '836127',
  161. 'ext': 'mp4',
  162. 'title': 'Morning Skate: OTT vs. BUF (9/23/15)',
  163. 'description': "Brian Duff chats with Tyler Ennis prior to Buffalo's first preseason home game.",
  164. 'duration': 93,
  165. 'upload_date': '20150923',
  166. },
  167. }]
  168. def _real_extract(self, url):
  169. news_id = self._match_id(url)
  170. webpage = self._download_webpage(url, news_id)
  171. video_id = self._search_regex(
  172. [r'pVid(\d+)', r"nlid\s*:\s*'(\d+)'",
  173. r'<iframe[^>]+src=["\']https?://video.*?\.nhl\.com/videocenter/embed\?.*\bplaylist=(\d+)'],
  174. webpage, 'video id')
  175. return self._real_extract_video(video_id)
  176. class NHLVideocenterCategoryIE(NHLBaseInfoExtractor):
  177. IE_NAME = 'nhl.com:videocenter:category'
  178. IE_DESC = 'NHL videocenter category'
  179. _VALID_URL = r'https?://video\.(?P<team>[^.]*)\.nhl\.com/videocenter/(console\?[^(id=)]*catid=(?P<catid>[0-9]+)(?![&?]id=).*?)?$'
  180. _TEST = {
  181. 'url': 'http://video.canucks.nhl.com/videocenter/console?catid=999',
  182. 'info_dict': {
  183. 'id': '999',
  184. 'title': 'Highlights',
  185. },
  186. 'playlist_count': 12,
  187. }
  188. def _real_extract(self, url):
  189. mobj = re.match(self._VALID_URL, url)
  190. team = mobj.group('team')
  191. webpage = self._download_webpage(url, team)
  192. cat_id = self._search_regex(
  193. [r'var defaultCatId = "(.+?)";',
  194. r'{statusIndex:0,index:0,.*?id:(.*?),'],
  195. webpage, 'category id')
  196. playlist_title = self._html_search_regex(
  197. r'tab0"[^>]*?>(.*?)</td>',
  198. webpage, 'playlist title', flags=re.DOTALL).lower().capitalize()
  199. data = compat_urllib_parse_urlencode({
  200. 'cid': cat_id,
  201. # This is the default value
  202. 'count': 12,
  203. 'ptrs': 3,
  204. 'format': 'json',
  205. })
  206. path = '/videocenter/servlets/browse?' + data
  207. request_url = compat_urlparse.urljoin(url, path)
  208. response = self._download_webpage(request_url, playlist_title)
  209. response = self._fix_json(response)
  210. if not response.strip():
  211. self._downloader.report_warning('Got an empty response, trying '
  212. 'adding the "newvideos" parameter')
  213. response = self._download_webpage(request_url + '&newvideos=true',
  214. playlist_title)
  215. response = self._fix_json(response)
  216. videos = json.loads(response)
  217. return {
  218. '_type': 'playlist',
  219. 'title': playlist_title,
  220. 'id': cat_id,
  221. 'entries': [self._extract_video(v) for v in videos],
  222. }
  223. class NHLIE(InfoExtractor):
  224. IE_NAME = 'nhl.com'
  225. _VALID_URL = r'https?://(?:www\.)?nhl\.com/([^/]+/)*c-(?P<id>\d+)'
  226. _TESTS = [{
  227. # type=video
  228. 'url': 'https://www.nhl.com/video/anisimov-cleans-up-mess/t-277752844/c-43663503',
  229. 'md5': '0f7b9a8f986fb4b4eeeece9a56416eaf',
  230. 'info_dict': {
  231. 'id': '43663503',
  232. 'ext': 'mp4',
  233. 'title': 'Anisimov cleans up mess',
  234. 'description': 'md5:a02354acdfe900e940ce40706939ca63',
  235. 'timestamp': 1461288600,
  236. 'upload_date': '20160422',
  237. },
  238. }, {
  239. # type=article
  240. 'url': 'https://www.nhl.com/news/dennis-wideman-suspended/c-278258934',
  241. 'md5': '1f39f4ea74c1394dea110699a25b366c',
  242. 'info_dict': {
  243. 'id': '40784403',
  244. 'ext': 'mp4',
  245. 'title': 'Wideman suspended by NHL',
  246. 'description': 'Flames defenseman Dennis Wideman was banned 20 games for violation of Rule 40 (Physical Abuse of Officials)',
  247. 'upload_date': '20160204',
  248. 'timestamp': 1454544904,
  249. },
  250. }]
  251. def _real_extract(self, url):
  252. tmp_id = self._match_id(url)
  253. video_data = self._download_json(
  254. 'https://nhl.bamcontent.com/nhl/id/v1/%s/details/web-v1.json' % tmp_id,
  255. tmp_id)
  256. if video_data.get('type') == 'article':
  257. video_data = video_data['media']
  258. video_id = compat_str(video_data['id'])
  259. title = video_data['title']
  260. formats = []
  261. for playback in video_data.get('playbacks', []):
  262. playback_url = playback.get('url')
  263. if not playback_url:
  264. continue
  265. ext = determine_ext(playback_url)
  266. if ext == 'm3u8':
  267. formats.extend(self._extract_m3u8_formats(
  268. playback_url, video_id, 'mp4', 'm3u8_native',
  269. m3u8_id=playback.get('name', 'hls'), fatal=False))
  270. else:
  271. height = int_or_none(playback.get('height'))
  272. formats.append({
  273. 'format_id': playback.get('name', 'http' + ('-%dp' % height if height else '')),
  274. 'url': playback_url,
  275. 'width': int_or_none(playback.get('width')),
  276. 'height': height,
  277. })
  278. self._sort_formats(formats, ('preference', 'width', 'height', 'tbr', 'format_id'))
  279. thumbnails = []
  280. for thumbnail_id, thumbnail_data in video_data.get('image', {}).get('cuts', {}).items():
  281. thumbnail_url = thumbnail_data.get('src')
  282. if not thumbnail_url:
  283. continue
  284. thumbnails.append({
  285. 'id': thumbnail_id,
  286. 'url': thumbnail_url,
  287. 'width': int_or_none(thumbnail_data.get('width')),
  288. 'height': int_or_none(thumbnail_data.get('height')),
  289. })
  290. return {
  291. 'id': video_id,
  292. 'title': title,
  293. 'description': video_data.get('description'),
  294. 'timestamp': parse_iso8601(video_data.get('date')),
  295. 'duration': parse_duration(video_data.get('duration')),
  296. 'thumbnails': thumbnails,
  297. 'formats': formats,
  298. }