You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

93 lines
3.4 KiB

10 years ago
  1. from __future__ import unicode_literals
  2. import re
  3. from .common import InfoExtractor
  4. from ..compat import (
  5. compat_urlparse,
  6. compat_urllib_parse,
  7. )
  8. from ..utils import (
  9. xpath_with_ns,
  10. )
  11. class InternetVideoArchiveIE(InfoExtractor):
  12. _VALID_URL = r'https?://video\.internetvideoarchive\.net/flash/players/.*?\?.*?publishedid.*?'
  13. _TEST = {
  14. 'url': 'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?customerid=69249&publishedid=452693&playerid=247',
  15. 'info_dict': {
  16. 'id': '452693',
  17. 'ext': 'mp4',
  18. 'title': 'SKYFALL',
  19. 'description': 'In SKYFALL, Bond\'s loyalty to M is tested as her past comes back to haunt her. As MI6 comes under attack, 007 must track down and destroy the threat, no matter how personal the cost.',
  20. 'duration': 152,
  21. },
  22. }
  23. @staticmethod
  24. def _build_url(query):
  25. return 'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?' + query
  26. @staticmethod
  27. def _clean_query(query):
  28. NEEDED_ARGS = ['publishedid', 'customerid']
  29. query_dic = compat_urlparse.parse_qs(query)
  30. cleaned_dic = dict((k, v[0]) for (k, v) in query_dic.items() if k in NEEDED_ARGS)
  31. # Other player ids return m3u8 urls
  32. cleaned_dic['playerid'] = '247'
  33. cleaned_dic['videokbrate'] = '100000'
  34. return compat_urllib_parse.urlencode(cleaned_dic)
  35. def _real_extract(self, url):
  36. query = compat_urlparse.urlparse(url).query
  37. query_dic = compat_urlparse.parse_qs(query)
  38. video_id = query_dic['publishedid'][0]
  39. url = self._build_url(query)
  40. flashconfiguration = self._download_xml(url, video_id,
  41. 'Downloading flash configuration')
  42. file_url = flashconfiguration.find('file').text
  43. file_url = file_url.replace('/playlist.aspx', '/mrssplaylist.aspx')
  44. # Replace some of the parameters in the query to get the best quality
  45. # and http links (no m3u8 manifests)
  46. file_url = re.sub(r'(?<=\?)(.+)$',
  47. lambda m: self._clean_query(m.group()),
  48. file_url)
  49. info = self._download_xml(file_url, video_id,
  50. 'Downloading video info')
  51. item = info.find('channel/item')
  52. def _bp(p):
  53. return xpath_with_ns(
  54. p,
  55. {
  56. 'media': 'http://search.yahoo.com/mrss/',
  57. 'jwplayer': 'http://developer.longtailvideo.com/trac/wiki/FlashFormats',
  58. }
  59. )
  60. formats = []
  61. for content in item.findall(_bp('media:group/media:content')):
  62. attr = content.attrib
  63. f_url = attr['url']
  64. width = int(attr['width'])
  65. bitrate = int(attr['bitrate'])
  66. format_id = '%d-%dk' % (width, bitrate)
  67. formats.append({
  68. 'format_id': format_id,
  69. 'url': f_url,
  70. 'width': width,
  71. 'tbr': bitrate,
  72. })
  73. self._sort_formats(formats)
  74. return {
  75. 'id': video_id,
  76. 'title': item.find('title').text,
  77. 'formats': formats,
  78. 'thumbnail': item.find(_bp('media:thumbnail')).attrib['url'],
  79. 'description': item.find('description').text,
  80. 'duration': int(attr['duration']),
  81. }