You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

86 lines
3.0 KiB

  1. import re
  2. from .common import InfoExtractor
  3. from ..utils import (
  4. compat_HTTPError,
  5. compat_str,
  6. compat_urllib_parse,
  7. compat_urllib_parse_urlparse,
  8. ExtractorError,
  9. )
  10. class AddAnimeIE(InfoExtractor):
  11. _VALID_URL = r'^http://(?:\w+\.)?add-anime\.net/watch_video\.php\?(?:.*?)v=(?P<video_id>[\w_]+)(?:.*)'
  12. IE_NAME = u'AddAnime'
  13. _TEST = {
  14. u'url': u'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9',
  15. u'file': u'24MR3YO5SAS9.mp4',
  16. u'md5': u'72954ea10bc979ab5e2eb288b21425a0',
  17. u'info_dict': {
  18. u"description": u"One Piece 606",
  19. u"title": u"One Piece 606"
  20. }
  21. }
  22. def _real_extract(self, url):
  23. try:
  24. mobj = re.match(self._VALID_URL, url)
  25. video_id = mobj.group('video_id')
  26. webpage = self._download_webpage(url, video_id)
  27. except ExtractorError as ee:
  28. if not isinstance(ee.cause, compat_HTTPError) or \
  29. ee.cause.code != 503:
  30. raise
  31. redir_webpage = ee.cause.read().decode('utf-8')
  32. action = self._search_regex(
  33. r'<form id="challenge-form" action="([^"]+)"',
  34. redir_webpage, u'Redirect form')
  35. vc = self._search_regex(
  36. r'<input type="hidden" name="jschl_vc" value="([^"]+)"/>',
  37. redir_webpage, u'redirect vc value')
  38. av = re.search(
  39. r'a\.value = ([0-9]+)[+]([0-9]+)[*]([0-9]+);',
  40. redir_webpage)
  41. if av is None:
  42. raise ExtractorError(u'Cannot find redirect math task')
  43. av_res = int(av.group(1)) + int(av.group(2)) * int(av.group(3))
  44. parsed_url = compat_urllib_parse_urlparse(url)
  45. av_val = av_res + len(parsed_url.netloc)
  46. confirm_url = (
  47. parsed_url.scheme + u'://' + parsed_url.netloc +
  48. action + '?' +
  49. compat_urllib_parse.urlencode({
  50. 'jschl_vc': vc, 'jschl_answer': compat_str(av_val)}))
  51. self._download_webpage(
  52. confirm_url, video_id,
  53. note=u'Confirming after redirect')
  54. webpage = self._download_webpage(url, video_id)
  55. formats = []
  56. for format_id in ('normal', 'hq'):
  57. rex = r"var %s_video_file = '(.*?)';" % re.escape(format_id)
  58. video_url = self._search_regex(rex, webpage, u'video file URLx',
  59. fatal=False)
  60. if not video_url:
  61. continue
  62. formats.append({
  63. 'format_id': format_id,
  64. 'url': video_url,
  65. })
  66. if not formats:
  67. raise ExtractorError(u'Cannot find any video format!')
  68. video_title = self._og_search_title(webpage)
  69. video_description = self._og_search_description(webpage)
  70. return {
  71. '_type': 'video',
  72. 'id': video_id,
  73. 'formats': formats,
  74. 'title': video_title,
  75. 'description': video_description
  76. }