You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

42 lines
1.6 KiB

  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. from .common import InfoExtractor
  4. from .jwplatform import JWPlatformIE
  5. class BusinessInsiderIE(InfoExtractor):
  6. _VALID_URL = r'https?://(?:[^/]+\.)?businessinsider\.(?:com|nl)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
  7. _TESTS = [{
  8. 'url': 'http://uk.businessinsider.com/how-much-radiation-youre-exposed-to-in-everyday-life-2016-6',
  9. 'md5': 'ca237a53a8eb20b6dc5bd60564d4ab3e',
  10. 'info_dict': {
  11. 'id': 'hZRllCfw',
  12. 'ext': 'mp4',
  13. 'title': "Here's how much radiation you're exposed to in everyday life",
  14. 'description': 'md5:9a0d6e2c279948aadaa5e84d6d9b99bd',
  15. 'upload_date': '20170709',
  16. 'timestamp': 1499606400,
  17. },
  18. 'params': {
  19. 'skip_download': True,
  20. },
  21. }, {
  22. 'url': 'https://www.businessinsider.nl/5-scientifically-proven-things-make-you-less-attractive-2017-7/',
  23. 'only_matching': True,
  24. }, {
  25. 'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
  26. 'only_matching': True,
  27. }]
  28. def _real_extract(self, url):
  29. video_id = self._match_id(url)
  30. webpage = self._download_webpage(url, video_id)
  31. jwplatform_id = self._search_regex(
  32. (r'data-media-id=["\']([a-zA-Z0-9]{8})',
  33. r'id=["\']jwplayer_([a-zA-Z0-9]{8})',
  34. r'id["\']?\s*:\s*["\']?([a-zA-Z0-9]{8})'),
  35. webpage, 'jwplatform id')
  36. return self.url_result(
  37. 'jwplatform:%s' % jwplatform_id, ie=JWPlatformIE.ie_key(),
  38. video_id=video_id)