You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

111 lines
4.0 KiB

  1. import json
  2. import netrc
  3. import re
  4. import socket
  5. from .common import InfoExtractor
  6. from ..utils import (
  7. compat_http_client,
  8. compat_str,
  9. compat_urllib_error,
  10. compat_urllib_parse,
  11. compat_urllib_request,
  12. ExtractorError,
  13. )
  14. class FacebookIE(InfoExtractor):
  15. """Information Extractor for Facebook"""
  16. _VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/(?:video/video|photo)\.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)'
  17. _LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&'
  18. _NETRC_MACHINE = 'facebook'
  19. IE_NAME = u'facebook'
  20. def report_login(self):
  21. """Report attempt to log in."""
  22. self.to_screen(u'Logging in')
  23. def _real_initialize(self):
  24. if self._downloader is None:
  25. return
  26. useremail = None
  27. password = None
  28. downloader_params = self._downloader.params
  29. # Attempt to use provided username and password or .netrc data
  30. if downloader_params.get('username', None) is not None:
  31. useremail = downloader_params['username']
  32. password = downloader_params['password']
  33. elif downloader_params.get('usenetrc', False):
  34. try:
  35. info = netrc.netrc().authenticators(self._NETRC_MACHINE)
  36. if info is not None:
  37. useremail = info[0]
  38. password = info[2]
  39. else:
  40. raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
  41. except (IOError, netrc.NetrcParseError) as err:
  42. self._downloader.report_warning(u'parsing .netrc: %s' % compat_str(err))
  43. return
  44. if useremail is None:
  45. return
  46. # Log in
  47. login_form = {
  48. 'email': useremail,
  49. 'pass': password,
  50. 'login': 'Log+In'
  51. }
  52. request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
  53. try:
  54. self.report_login()
  55. login_results = compat_urllib_request.urlopen(request).read()
  56. if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
  57. self._downloader.report_warning(u'unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
  58. return
  59. except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
  60. self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
  61. return
  62. def _real_extract(self, url):
  63. mobj = re.match(self._VALID_URL, url)
  64. if mobj is None:
  65. raise ExtractorError(u'Invalid URL: %s' % url)
  66. video_id = mobj.group('ID')
  67. url = 'https://www.facebook.com/video/video.php?v=%s' % video_id
  68. webpage = self._download_webpage(url, video_id)
  69. BEFORE = '{swf.addParam(param[0], param[1]);});\n'
  70. AFTER = '.forEach(function(variable) {swf.addVariable(variable[0], variable[1]);});'
  71. m = re.search(re.escape(BEFORE) + '(.*?)' + re.escape(AFTER), webpage)
  72. if not m:
  73. raise ExtractorError(u'Cannot parse data')
  74. data = dict(json.loads(m.group(1)))
  75. params_raw = compat_urllib_parse.unquote(data['params'])
  76. params = json.loads(params_raw)
  77. video_data = params['video_data'][0]
  78. video_url = video_data.get('hd_src')
  79. if not video_url:
  80. video_url = video_data['sd_src']
  81. if not video_url:
  82. raise ExtractorError(u'Cannot find video URL')
  83. video_duration = int(video_data['video_duration'])
  84. thumbnail = video_data['thumbnail_src']
  85. video_title = self._html_search_regex('<h2 class="uiHeaderTitle">([^<]+)</h2>',
  86. webpage, u'title')
  87. info = {
  88. 'id': video_id,
  89. 'title': video_title,
  90. 'url': video_url,
  91. 'ext': 'mp4',
  92. 'duration': video_duration,
  93. 'thumbnail': thumbnail,
  94. }
  95. return [info]