You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

76 lines
2.5 KiB

  1. from __future__ import unicode_literals
  2. import re
  3. from .common import InfoExtractor
  4. from ..utils import parse_iso8601
  5. class NYTimesIE(InfoExtractor):
  6. _VALID_URL = r'https?://(?:www\.)?nytimes\.com/video/(?:[^/]+/)+(?P<id>\d+)'
  7. _TEST = {
  8. 'url': 'http://www.nytimes.com/video/opinion/100000002847155/verbatim-what-is-a-photocopier.html?playlistId=100000001150263',
  9. 'md5': '18a525a510f942ada2720db5f31644c0',
  10. 'info_dict': {
  11. 'id': '100000002847155',
  12. 'ext': 'mov',
  13. 'title': 'Verbatim: What Is a Photocopier?',
  14. 'description': 'md5:93603dada88ddbda9395632fdc5da260',
  15. 'timestamp': 1398631707,
  16. 'upload_date': '20140427',
  17. 'uploader': 'Brett Weiner',
  18. 'duration': 419,
  19. }
  20. }
  21. def _real_extract(self, url):
  22. mobj = re.match(self._VALID_URL, url)
  23. video_id = mobj.group('id')
  24. video_data = self._download_json(
  25. 'http://www.nytimes.com/svc/video/api/v2/video/%s' % video_id, video_id, 'Downloading video JSON')
  26. title = video_data['headline']
  27. description = video_data['summary']
  28. duration = video_data['duration'] / 1000.0
  29. uploader = video_data['byline']
  30. timestamp = parse_iso8601(video_data['publication_date'][:-8])
  31. def get_file_size(file_size):
  32. if isinstance(file_size, int):
  33. return file_size
  34. elif isinstance(file_size, dict):
  35. return int(file_size.get('value', 0))
  36. else:
  37. return 0
  38. formats = [
  39. {
  40. 'url': video['url'],
  41. 'format_id': video['type'],
  42. 'vcodec': video['video_codec'],
  43. 'width': video['width'],
  44. 'height': video['height'],
  45. 'filesize': get_file_size(video['fileSize']),
  46. } for video in video_data['renditions']
  47. ]
  48. self._sort_formats(formats)
  49. thumbnails = [
  50. {
  51. 'url': 'http://www.nytimes.com/%s' % image['url'],
  52. 'resolution': '%dx%d' % (image['width'], image['height']),
  53. } for image in video_data['images']
  54. ]
  55. return {
  56. 'id': video_id,
  57. 'title': title,
  58. 'description': description,
  59. 'timestamp': timestamp,
  60. 'uploader': uploader,
  61. 'duration': duration,
  62. 'formats': formats,
  63. 'thumbnails': thumbnails,
  64. }