You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

62 lines
1.9 KiB

  1. from __future__ import unicode_literals
  2. import re
  3. from .common import InfoExtractor
  4. from ..utils import (
  5. unified_strdate,
  6. )
  7. class NPOIE(InfoExtractor):
  8. IE_NAME = 'npo.nl'
  9. _VALID_URL = r'https?://www\.npo\.nl/[^/]+/[^/]+/(?P<id>[^/?]+)'
  10. _TEST = {
  11. 'url': 'http://www.npo.nl/nieuwsuur/22-06-2014/VPWON_1220719',
  12. 'md5': '4b3f9c429157ec4775f2c9cb7b911016',
  13. 'info_dict': {
  14. 'id': 'VPWON_1220719',
  15. 'ext': 'mp4',
  16. 'title': 'Nieuwsuur',
  17. 'description': 'Dagelijks tussen tien en elf: nieuws, sport en achtergronden.',
  18. 'upload_date': '20140622',
  19. },
  20. }
  21. def _real_extract(self, url):
  22. mobj = re.match(self._VALID_URL, url)
  23. video_id = mobj.group('id')
  24. metadata = self._download_json(
  25. 'http://e.omroep.nl/metadata/aflevering/%s' % video_id,
  26. video_id,
  27. # We have to remove the javascript callback
  28. transform_source=lambda j: re.sub(r'parseMetadata\((.*?)\);\n//.*$', r'\1', j)
  29. )
  30. token_page = self._download_webpage(
  31. 'http://ida.omroep.nl/npoplayer/i.js',
  32. video_id,
  33. note='Downloading token'
  34. )
  35. token = self._search_regex(r'npoplayer.token = "(.+?)"', token_page, 'token')
  36. streams_info = self._download_json(
  37. 'http://ida.omroep.nl/odi/?prid=%s&puboptions=h264_std&adaptive=yes&token=%s' % (video_id, token),
  38. video_id
  39. )
  40. stream_info = self._download_json(
  41. streams_info['streams'][0] + '&type=json',
  42. video_id,
  43. 'Downloading stream info'
  44. )
  45. return {
  46. 'id': video_id,
  47. 'title': metadata['titel'],
  48. 'ext': 'mp4',
  49. 'url': stream_info['url'],
  50. 'description': metadata['info'],
  51. 'thumbnail': metadata['images'][-1]['url'],
  52. 'upload_date': unified_strdate(metadata['gidsdatum']),
  53. }