You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

59 lines
1.8 KiB

  1. #!/usr/bin/env python
  2. """
  3. This script employs a VERY basic heuristic ('porn' in webpage.lower()) to check
  4. if we are not 'age_limit' tagging some porn site
  5. A second approach implemented relies on a list of porn domains, to activate it
  6. pass the list filename as the only argument
  7. """
  8. # Allow direct execution
  9. import os
  10. import sys
  11. sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  12. from test.helper import get_testcases
  13. from youtube_dl.utils import compat_urllib_parse_urlparse
  14. from youtube_dl.utils import compat_urllib_request
  15. if len(sys.argv) > 1:
  16. METHOD = 'LIST'
  17. LIST = open(sys.argv[1]).read().decode('utf8').strip()
  18. else:
  19. METHOD = 'EURISTIC'
  20. for test in get_testcases():
  21. if METHOD == 'EURISTIC':
  22. try:
  23. webpage = compat_urllib_request.urlopen(test['url'], timeout=10).read()
  24. except:
  25. print('\nFail: {0}'.format(test['name']))
  26. continue
  27. webpage = webpage.decode('utf8', 'replace')
  28. RESULT = 'porn' in webpage.lower()
  29. elif METHOD == 'LIST':
  30. domain = compat_urllib_parse_urlparse(test['url']).netloc
  31. if not domain:
  32. print('\nFail: {0}'.format(test['name']))
  33. continue
  34. domain = '.'.join(domain.split('.')[-2:])
  35. RESULT = ('.' + domain + '\n' in LIST or '\n' + domain + '\n' in LIST)
  36. if RESULT and ('info_dict' not in test or 'age_limit' not in test['info_dict']
  37. or test['info_dict']['age_limit'] != 18):
  38. print('\nPotential missing age_limit check: {0}'.format(test['name']))
  39. elif not RESULT and ('info_dict' in test and 'age_limit' in test['info_dict']
  40. and test['info_dict']['age_limit'] == 18):
  41. print('\nPotential false negative: {0}'.format(test['name']))
  42. else:
  43. sys.stdout.write('.')
  44. sys.stdout.flush()
  45. print()