You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

82 lines
2.8 KiB

  1. import datetime
  2. import re
  3. from .common import InfoExtractor
  4. from ..utils import (
  5. ExtractorError,
  6. )
  7. class GooglePlusIE(InfoExtractor):
  8. """Information extractor for plus.google.com."""
  9. _VALID_URL = r'(?:https://)?plus\.google\.com/(?:[^/]+/)*?posts/(\w+)'
  10. IE_NAME = u'plus.google'
  11. def _real_extract(self, url):
  12. # Extract id from URL
  13. mobj = re.match(self._VALID_URL, url)
  14. if mobj is None:
  15. raise ExtractorError(u'Invalid URL: %s' % url)
  16. post_url = mobj.group(0)
  17. video_id = mobj.group(1)
  18. video_extension = 'flv'
  19. # Step 1, Retrieve post webpage to extract further information
  20. webpage = self._download_webpage(post_url, video_id, u'Downloading entry webpage')
  21. self.report_extraction(video_id)
  22. # Extract update date
  23. upload_date = self._html_search_regex('title="Timestamp">(.*?)</a>',
  24. webpage, u'upload date', fatal=False)
  25. if upload_date:
  26. # Convert timestring to a format suitable for filename
  27. upload_date = datetime.datetime.strptime(upload_date, "%Y-%m-%d")
  28. upload_date = upload_date.strftime('%Y%m%d')
  29. # Extract uploader
  30. uploader = self._html_search_regex(r'rel\="author".*?>(.*?)</a>',
  31. webpage, u'uploader', fatal=False)
  32. # Extract title
  33. # Get the first line for title
  34. video_title = self._html_search_regex(r'<meta name\=\"Description\" content\=\"(.*?)[\n<"]',
  35. webpage, 'title', default=u'NA')
  36. # Step 2, Stimulate clicking the image box to launch video
  37. video_page = self._search_regex('"(https\://plus\.google\.com/photos/.*?)",,"image/jpeg","video"\]',
  38. webpage, u'video page URL')
  39. webpage = self._download_webpage(video_page, video_id, u'Downloading video page')
  40. # Extract video links on video page
  41. """Extract video links of all sizes"""
  42. pattern = '\d+,\d+,(\d+),"(http\://redirector\.googlevideo\.com.*?)"'
  43. mobj = re.findall(pattern, webpage)
  44. if len(mobj) == 0:
  45. raise ExtractorError(u'Unable to extract video links')
  46. # Sort in resolution
  47. links = sorted(mobj)
  48. # Choose the lowest of the sort, i.e. highest resolution
  49. video_url = links[-1]
  50. # Only get the url. The resolution part in the tuple has no use anymore
  51. video_url = video_url[-1]
  52. # Treat escaped \u0026 style hex
  53. try:
  54. video_url = video_url.decode("unicode_escape")
  55. except AttributeError: # Python 3
  56. video_url = bytes(video_url, 'ascii').decode('unicode-escape')
  57. return [{
  58. 'id': video_id,
  59. 'url': video_url,
  60. 'uploader': uploader,
  61. 'upload_date': upload_date,
  62. 'title': video_title,
  63. 'ext': video_extension,
  64. }]