You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

62 lines
2.4 KiB

  1. # encoding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. from .common import InfoExtractor
  5. from ..utils import unified_strdate
  6. class LifeNewsIE(InfoExtractor):
  7. IE_NAME = 'lifenews'
  8. IE_DESC = 'LIFE | NEWS'
  9. _VALID_URL = r'http://lifenews\.ru/(?:mobile/)?news/(?P<id>\d+)'
  10. _TEST = {
  11. 'url': 'http://lifenews.ru/news/126342',
  12. 'file': '126342.mp4',
  13. 'md5': 'e1b50a5c5fb98a6a544250f2e0db570a',
  14. 'info_dict': {
  15. 'title': 'МВД разыскивает троих мужчин, оставивших в IKEA сумку с автоматом',
  16. 'description': 'Камеры наблюдения гипермаркета зафиксировали троих мужчин, спрятавших оружейный арсенал в камере хранения.',
  17. 'thumbnail': 'http://lifenews.ru/static/posts/2014/1/126342/.video.jpg',
  18. 'upload_date': '20140130',
  19. }
  20. }
  21. def _real_extract(self, url):
  22. mobj = re.match(self._VALID_URL, url)
  23. video_id = mobj.group('id')
  24. webpage = self._download_webpage('http://lifenews.ru/mobile/news/%s' % video_id, video_id, 'Downloading page')
  25. video_url = self._html_search_regex(
  26. r'<video.*?src="([^"]+)"></video>', webpage, 'video URL')
  27. thumbnail = self._html_search_regex(
  28. r'<video.*?poster="([^"]+)".*?"></video>', webpage, 'video thumbnail')
  29. title = self._og_search_title(webpage)
  30. TITLE_SUFFIX = ' - Первый по срочным новостям — LIFE | NEWS'
  31. if title.endswith(TITLE_SUFFIX):
  32. title = title[:-len(TITLE_SUFFIX)]
  33. description = self._og_search_description(webpage)
  34. view_count = self._html_search_regex(
  35. r'<div class=\'views\'>(\d+)</div>', webpage, 'view count')
  36. comment_count = self._html_search_regex(
  37. r'<div class=\'comments\'>(\d+)</div>', webpage, 'comment count')
  38. upload_date = self._html_search_regex(
  39. r'<time datetime=\'([^\']+)\'>', webpage, 'upload date')
  40. return {
  41. 'id': video_id,
  42. 'url': video_url,
  43. 'thumbnail': thumbnail,
  44. 'title': title,
  45. 'description': description,
  46. 'view_count': view_count,
  47. 'comment_count': comment_count,
  48. 'upload_date': unified_strdate(upload_date),
  49. }