You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

70 lines
2.4 KiB

  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. from .common import InfoExtractor
  5. from ..utils import (
  6. determine_ext,
  7. ExtractorError,
  8. urlencode_postdata,
  9. )
  10. class PromptFileIE(InfoExtractor):
  11. _VALID_URL = r'https?://(?:www\.)?promptfile\.com/l/(?P<id>[0-9A-Z\-]+)'
  12. _TEST = {
  13. 'url': 'http://www.promptfile.com/l/86D1CE8462-576CAAE416',
  14. 'md5': '5a7e285a26e0d66d9a263fae91bc92ce',
  15. 'info_dict': {
  16. 'id': '86D1CE8462-576CAAE416',
  17. 'ext': 'mp4',
  18. 'title': 'oceans.mp4',
  19. 'thumbnail': 're:^https?://.*\.jpg$',
  20. }
  21. }
  22. def _real_extract(self, url):
  23. video_id = self._match_id(url)
  24. webpage = self._download_webpage(url, video_id)
  25. if re.search(r'<div.+id="not_found_msg".+>(?!We are).+</div>[^-]', webpage) is not None:
  26. raise ExtractorError('Video %s does not exist' % video_id,
  27. expected=True)
  28. chash = self._search_regex(
  29. r'val\("([^"]*)"\s*\+\s*\$\("#chash"\)', webpage, 'chash')
  30. fields = self._hidden_inputs(webpage)
  31. keys = list(fields.keys())
  32. chash_key = keys[0] if len(keys) == 1 else next(
  33. key for key in keys if key.startswith('cha'))
  34. fields[chash_key] = chash + fields[chash_key]
  35. webpage = self._download_webpage(
  36. url, video_id, 'Downloading video page',
  37. data=urlencode_postdata(fields),
  38. headers={'Content-type': 'application/x-www-form-urlencoded'})
  39. video_url = self._search_regex(
  40. (r'<a[^>]+href=(["\'])(?P<url>(?:(?!\1).)+)\1[^>]*>\s*Download File',
  41. r'<a[^>]+href=(["\'])(?P<url>https?://(?:www\.)?promptfile\.com/file/(?:(?!\1).)+)\1'),
  42. webpage, 'video url', group='url')
  43. title = self._html_search_regex(
  44. r'<span.+title="([^"]+)">', webpage, 'title')
  45. thumbnail = self._html_search_regex(
  46. r'<div id="player_overlay">.*button>.*?<img src="([^"]+)"',
  47. webpage, 'thumbnail', fatal=False, flags=re.DOTALL)
  48. formats = [{
  49. 'format_id': 'sd',
  50. 'url': video_url,
  51. 'ext': determine_ext(title),
  52. }]
  53. self._sort_formats(formats)
  54. return {
  55. 'id': video_id,
  56. 'title': title,
  57. 'thumbnail': thumbnail,
  58. 'formats': formats,
  59. }