You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

66 lines
2.6 KiB

  1. import json
  2. import re
  3. import time
  4. from .common import InfoExtractor
  5. from ..utils import month_by_name
  6. class NDTVIE(InfoExtractor):
  7. _VALID_URL = r'^https?://(?:www\.)?ndtv\.com/video/player/[^/]*/[^/]*/(?P<id>[a-z0-9]+)'
  8. _TEST = {
  9. u"url": u"http://www.ndtv.com/video/player/news/ndtv-exclusive-don-t-need-character-certificate-from-rahul-gandhi-says-arvind-kejriwal/300710",
  10. u"file": u"300710.mp4",
  11. u"md5": u"39f992dbe5fb531c395d8bbedb1e5e88",
  12. u"info_dict": {
  13. u"title": u"NDTV exclusive: Don't need character certificate from Rahul Gandhi, says Arvind Kejriwal",
  14. u"description": u"In an exclusive interview to NDTV, Aam Aadmi Party's Arvind Kejriwal says it makes no difference to him that Rahul Gandhi said the Congress needs to learn from his party.",
  15. u"upload_date": u"20131208",
  16. u"duration": 1327,
  17. u"thumbnail": u"http://i.ndtvimg.com/video/images/vod/medium/2013-12/big_300710_1386518307.jpg",
  18. },
  19. }
  20. def _real_extract(self, url):
  21. mobj = re.match(self._VALID_URL, url)
  22. video_id = mobj.group('id')
  23. webpage = self._download_webpage(url, video_id)
  24. filename = self._search_regex(
  25. r"__filename='([^']+)'", webpage, u'video filename')
  26. video_url = (u'http://bitcast-b.bitgravity.com/ndtvod/23372/ndtv/%s' %
  27. filename)
  28. duration_str = filename = self._search_regex(
  29. r"__duration='([^']+)'", webpage, u'duration', fatal=False)
  30. duration = None if duration_str is None else int(duration_str)
  31. date_m = re.search(r'''(?x)
  32. <p\s+class="vod_dateline">\s*
  33. Published\s+On:\s*
  34. (?P<monthname>[A-Za-z]+)\s+(?P<day>[0-9]+),\s*(?P<year>[0-9]+)
  35. ''', webpage)
  36. upload_date = None
  37. assert date_m
  38. if date_m is not None:
  39. month = month_by_name(date_m.group('monthname'))
  40. if month is not None:
  41. upload_date = '%s%02d%02d' % (
  42. date_m.group('year'), month, int(date_m.group('day')))
  43. description = self._og_search_description(webpage)
  44. READ_MORE = u' (Read more)'
  45. if description.endswith(READ_MORE):
  46. description = description[:-len(READ_MORE)]
  47. return {
  48. 'id': video_id,
  49. 'url': video_url,
  50. 'title': self._og_search_title(webpage),
  51. 'description': description,
  52. 'thumbnail': self._og_search_thumbnail(webpage),
  53. 'duration': duration,
  54. 'upload_date': upload_date,
  55. }