You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

76 lines
2.3 KiB

  1. import json
  2. import re
  3. import xml.etree.ElementTree
  4. from .common import InfoExtractor
  5. from ..utils import (
  6. ExtractorError,
  7. )
  8. class TriluliluIE(InfoExtractor):
  9. _VALID_URL = r'(?x)(?:https?://)?(?:www\.)?trilulilu\.ro/video-(?P<category>[^/]+)/(?P<video_id>[^/]+)'
  10. _TEST = {
  11. u"url": u"http://www.trilulilu.ro/video-animatie/big-buck-bunny-1",
  12. u'file': u"big-buck-bunny-1.mp4",
  13. u'info_dict': {
  14. u"title": u"Big Buck Bunny",
  15. u"description": u":) pentru copilul din noi",
  16. },
  17. # Server ignores Range headers (--test)
  18. u"params": {
  19. u"skip_download": True
  20. }
  21. }
  22. def _real_extract(self, url):
  23. mobj = re.match(self._VALID_URL, url)
  24. video_id = mobj.group('video_id')
  25. webpage = self._download_webpage(url, video_id)
  26. title = self._og_search_title(webpage)
  27. thumbnail = self._og_search_thumbnail(webpage)
  28. description = self._og_search_description(webpage)
  29. log_str = self._search_regex(
  30. r'block_flash_vars[ ]=[ ]({[^}]+})', webpage, u'log info')
  31. log = json.loads(log_str)
  32. format_url = (u'http://fs%(server)s.trilulilu.ro/%(hash)s/'
  33. u'video-formats2' % log)
  34. format_str = self._download_webpage(
  35. format_url, video_id,
  36. note=u'Downloading formats',
  37. errnote=u'Error while downloading formats')
  38. format_doc = xml.etree.ElementTree.fromstring(format_str)
  39. video_url_template = (
  40. u'http://fs%(server)s.trilulilu.ro/stream.php?type=video'
  41. u'&source=site&hash=%(hash)s&username=%(userid)s&'
  42. u'key=ministhebest&format=%%s&sig=&exp=' %
  43. log)
  44. formats = [
  45. {
  46. 'format': fnode.text,
  47. 'url': video_url_template % fnode.text,
  48. }
  49. for fnode in format_doc.findall('./formats/format')
  50. ]
  51. info = {
  52. '_type': 'video',
  53. 'id': video_id,
  54. 'formats': formats,
  55. 'title': title,
  56. 'description': description,
  57. 'thumbnail': thumbnail,
  58. }
  59. # TODO: Remove when #980 has been merged
  60. info['url'] = formats[-1]['url']
  61. info['ext'] = formats[-1]['format'].partition('-')[0]
  62. return info