Browse Source

Add various anime sites (Closes #4554)

totalwebcasting
Philipp Hagemeister 10 years ago
parent
commit
b68ff25917
6 changed files with 355 additions and 0 deletions
  1. +1
    -0
      AUTHORS
  2. +19
    -0
      youtube_dl/extractor/__init__.py
  3. +76
    -0
      youtube_dl/extractor/gogoanime.py
  4. +149
    -0
      youtube_dl/extractor/play44.py
  5. +74
    -0
      youtube_dl/extractor/soulanime.py
  6. +36
    -0
      youtube_dl/extractor/videofun.py

+ 1
- 0
AUTHORS View File

@ -98,3 +98,4 @@ Will Glynn
Max Reimann Max Reimann
Cédric Luthi Cédric Luthi
Thijs Vermeir Thijs Vermeir
Joel Leclerc

+ 19
- 0
youtube_dl/extractor/__init__.py View File

@ -164,6 +164,10 @@ from .globo import GloboIE
from .godtube import GodTubeIE from .godtube import GodTubeIE
from .goldenmoustache import GoldenMoustacheIE from .goldenmoustache import GoldenMoustacheIE
from .golem import GolemIE from .golem import GolemIE
from .gogoanime import (
GoGoAnimeIE,
GoGoAnimeSearchIE
)
from .googleplus import GooglePlusIE from .googleplus import GooglePlusIE
from .googlesearch import GoogleSearchIE from .googlesearch import GoogleSearchIE
from .gorillavid import GorillaVidIE from .gorillavid import GorillaVidIE
@ -313,6 +317,16 @@ from .phoenix import PhoenixIE
from .photobucket import PhotobucketIE from .photobucket import PhotobucketIE
from .planetaplay import PlanetaPlayIE from .planetaplay import PlanetaPlayIE
from .played import PlayedIE from .played import PlayedIE
from .play44 import (
Play44IE,
ByZooIE,
Video44IE,
VideoWingIE,
PlayPandaIE,
VideoZooIE,
PlayBBIE,
EasyVideoIE
)
from .playfm import PlayFMIE from .playfm import PlayFMIE
from .playvid import PlayvidIE from .playvid import PlayvidIE
from .podomatic import PodomaticIE from .podomatic import PodomaticIE
@ -373,6 +387,10 @@ from .smotri import (
from .snotr import SnotrIE from .snotr import SnotrIE
from .sockshare import SockshareIE from .sockshare import SockshareIE
from .sohu import SohuIE from .sohu import SohuIE
from .soulanime import (
SoulAnimeWatchingIE,
SoulAnimeSeriesIE
)
from .soundcloud import ( from .soundcloud import (
SoundcloudIE, SoundcloudIE,
SoundcloudSetIE, SoundcloudSetIE,
@ -467,6 +485,7 @@ from .viddler import ViddlerIE
from .videobam import VideoBamIE from .videobam import VideoBamIE
from .videodetective import VideoDetectiveIE from .videodetective import VideoDetectiveIE
from .videolecturesnet import VideoLecturesNetIE from .videolecturesnet import VideoLecturesNetIE
from .videofun import VideoFunIE
from .videofyme import VideofyMeIE from .videofyme import VideofyMeIE
from .videomega import VideoMegaIE from .videomega import VideoMegaIE
from .videopremium import VideoPremiumIE from .videopremium import VideoPremiumIE


+ 76
- 0
youtube_dl/extractor/gogoanime.py View File

@ -0,0 +1,76 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
compat_urllib_parse,
get_element_by_attribute,
unescapeHTML
)
class GoGoAnimeIE(InfoExtractor):
IE_NAME = 'gogoanime'
IE_DESC = 'GoGoAnime'
_VALID_URL = r'http://www.gogoanime.com/(?P<id>[A-Za-z0-9-]+)'
_TEST = {
'url': 'http://www.gogoanime.com/mahou-shoujo-madoka-magica-movie-1',
'info_dict': {
'id': 'mahou-shoujo-madoka-magica-movie-1'
},
'playlist_count': 3
}
def _real_extract(self, url):
video_id = self._match_id(url)
page = self._download_webpage(url, video_id)
if 'Oops! Page Not Found</font>' in page:
raise ExtractorError('Video does not exist', expected=True)
content = get_element_by_attribute("class", "postcontent", page)
vids = re.findall(r'<iframe[^>]*?src=[\'"](h[^\'"]+)[\'"]', content)
vids = [
unescapeHTML(compat_urllib_parse.unquote(x))
for x in vids if not re.search(r".*videofun.*", x)]
if re.search(r'<div class="postcontent">[^<]*<p><iframe src=[\'"][^>]+></iframe><br />', page):
return self.playlist_result([self.url_result(vid) for vid in vids], video_id)
title = self._html_search_regex(
r'<div class="postdesc">[^<]*<h1>([^<]+)</h1>', page, 'title')
return {
'_type': 'url',
'id': video_id,
'url': vids[0],
'title': title,
}
class GoGoAnimeSearchIE(InfoExtractor):
IE_NAME = 'gogoanime:search'
IE_DESC = 'GoGoAnime Search'
_VALID_URL = r'http://www\.gogoanime\.com/.*\?s=(?P<id>[^&]*)'
_TEST = {
'url': 'http://www.gogoanime.com/?s=bokusatsu',
'info_dict': {
'id': 'bokusatsu'
},
'playlist_count': 6
}
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
posts = re.findall(
r'<div class="postlist">[^<]*<p[^>]*>[^<]*<a href="(?P<url>[^"]+)"',
webpage)
return self.playlist_result(
[self.url_result(p) for p in posts], playlist_id)

+ 149
- 0
youtube_dl/extractor/play44.py View File

@ -0,0 +1,149 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse
)
class Play44IE(InfoExtractor):
_VALID_URL = r'http://[w.]*play44\.net/embed\.php[^/]*/(?P<id>.+)'
_TESTS = [{
'url': 'http://play44.net/embed.php?w=600&h=438&vid=M/mahou-shoujo-madoka-magica-07.flv',
'md5': 'e37e99d665f503dd2db952f7c4dba9e6',
'info_dict': {
'id': 'mahou-shoujo-madoka-magica-07',
'ext': 'flv',
'title': 'mahou-shoujo-madoka-magica-07',
}
}]
def _real_extract(self, url):
video_id = self._match_id(url)
page = self._download_webpage(url, video_id)
video_url = compat_urllib_parse.unquote(self._html_search_regex(
r'_url = "(https?://[^"]+?)";', page, 'url'))
title = self._search_regex(r'.*/(?P<title>[^.]*).', video_url, 'title')
return {
'id': title,
'url': video_url,
'title': title,
}
class ByZooIE(Play44IE):
_VALID_URL = r'http://[w.]*byzoo\.org/embed\.php[^/]*/(?P<id>.+)'
_TESTS = [{
'url': 'http://byzoo.org/embed.php?w=600&h=438&vid=at/nw/mahou_shoujo_madoka_magica_movie_3_-_part1.mp4',
'md5': '455c83dabe2cd9fd74a87612b01fe017',
'info_dict': {
'id': 'mahou_shoujo_madoka_magica_movie_3_-_part1',
'ext': 'mp4',
'title': 'mahou_shoujo_madoka_magica_movie_3_-_part1',
}
}]
class Video44IE(Play44IE):
_VALID_URL = r'http://[w.]*video44\.net/.*file=(?P<id>[^&].).*'
_TESTS = [{
'url': 'http://www.video44.net/gogo/?w=600&h=438&file=chaoshead-12.flv&sv=1',
'md5': '43eaec6d0beb10e8d42459b9f108aff3',
'info_dict': {
'id': 'chaoshead-12',
'ext': 'mp4',
'title': 'chaoshead-12',
}
}]
class VideoWingIE(Play44IE):
_VALID_URL = r'''(?x)
http://[w.]*videowing\.[^/]*/
(?:
.*video=/*
|embed/
)
(?P<id>[^&?.]+)
'''
_TESTS = [{
'url': 'http://videowing.me/embed?w=718&h=438&video=ongoing/boku_wa_tomodachi_ga_sukunai_-_05.mp4',
'md5': '4ed320e353ed26c742c4f12a9c210b60',
'info_dict': {
'id': 'boku_wa_tomodachi_ga_sukunai_-_05',
'ext': 'mp4',
'title': 'boku_wa_tomodachi_ga_sukunai_-_05',
}
}, {
'url': 'http://videowing.me/embed/a8d6a39522df066bd734a69f2334497e?w=600&h=438',
'md5': '33fdd71581357018c226f95c5cedcfd7',
'info_dict': {
'id': 'mahoushoujomadokamagicamovie1part1',
'ext': 'flv',
'title': 'mahoushoujomadokamagicamovie1part1',
}
}]
class PlayPandaIE(Play44IE):
_VALID_URL = r'http://[w.]*playpanda\.[^/]*/.*vid=/*(?P<id>[^&].).*'
_TESTS = [{
'url': 'http://playpanda.net/embed.php?w=718&h=438&vid=at/nw/boku_wa_tomodachi_ga_sukunai_-_05.mp4',
'md5': '4ed320e353ed26c742c4f12a9c210b60',
'info_dict': {
'id': 'boku_wa_tomodachi_ga_sukunai_-_05',
'ext': 'mp4',
'title': 'boku_wa_tomodachi_ga_sukunai_-_05',
'description': 'boku_wa_tomodachi_ga_sukunai_-_05'
}
}]
class VideoZooIE(Play44IE):
_VALID_URL = r'http://[w.]*videozoo\.[^/]*/.*vid=/*(?P<id>[^&].).*'
_TESTS = [{
'url': 'http://videozoo.me/embed.php?w=718&h=438&vid=at/nw/boku_wa_tomodachi_ga_sukunai_-_05.mp4',
'md5': '4ed320e353ed26c742c4f12a9c210b60',
'info_dict': {
'id': 'boku_wa_tomodachi_ga_sukunai_-_05',
'ext': 'mp4',
'title': 'boku_wa_tomodachi_ga_sukunai_-_05',
}
}]
class PlayBBIE(Play44IE):
_VALID_URL = r'http://[w.]*playbb\.[^/]*/.*vid=/*(?P<id>[^&].).*'
_TESTS = [{
'url': 'http://playbb.me/embed.php?w=718&h=438&vid=at/nw/boku_wa_tomodachi_ga_sukunai_-_05.mp4',
'md5': '4ed320e353ed26c742c4f12a9c210b60',
'info_dict': {
'id': 'boku_wa_tomodachi_ga_sukunai_-_05',
'ext': 'mp4',
'title': 'boku_wa_tomodachi_ga_sukunai_-_05',
}
}]
class EasyVideoIE(Play44IE):
_VALID_URL = r'http://[w.]*easyvideo\.[^/]*/.*file=/*(?P<id>[^&.]+)'
_TESTS = [{
'url': 'http://easyvideo.me/gogo/?w=718&h=438&file=bokuwatomodachigasukunai-04.flv&sv=1',
'md5': '26178b57629b7650106d72b191137176',
'info_dict': {
'id': 'bokuwatomodachigasukunai-04',
'ext': 'mp4',
'title': 'bokuwatomodachigasukunai-04',
},
'skip': 'Blocked in Germany',
}]

+ 74
- 0
youtube_dl/extractor/soulanime.py View File

@ -0,0 +1,74 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class SoulAnimeWatchingIE(InfoExtractor):
IE_NAME = "soulanime:watching"
IE_DESC = "SoulAnime video"
_TEST = {
'url': 'http://www.soul-anime.net/watching/seirei-tsukai-no-blade-dance-episode-9/',
'md5': '05fae04abf72298098b528e98abf4298',
'info_dict': {
'id': 'seirei-tsukai-no-blade-dance-episode-9',
'ext': 'mp4',
'title': 'seirei-tsukai-no-blade-dance-episode-9',
'description': 'seirei-tsukai-no-blade-dance-episode-9'
}
}
_VALID_URL = r'http://[w.]*soul-anime\.(?P<domain>[^/]+)/watch[^/]*/(?P<id>[^/]+)'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
domain = mobj.group('domain')
page = self._download_webpage(url, video_id)
video_url_encoded = self._html_search_regex(
r'<div id="download">[^<]*<a href="(?P<url>[^"]+)"', page, 'url')
video_url = "http://www.soul-anime." + domain + video_url_encoded
vid = self._request_webpage(video_url, video_id)
ext = vid.info().gettype().split("/")[1]
return {
'id': video_id,
'url': video_url,
'ext': ext,
'title': video_id,
'description': video_id
}
class SoulAnimeSeriesIE(InfoExtractor):
IE_NAME = "soulanime:series"
IE_DESC = "SoulAnime Series"
_VALID_URL = r'http://[w.]*soul-anime\.(?P<domain>[^/]+)/anime./(?P<id>[^/]+)'
_EPISODE_REGEX = r'<option value="(/watch[^/]*/[^"]+)">[^<]*</option>'
_TEST = {
'url': 'http://www.soul-anime.net/anime1/black-rock-shooter-tv/',
'info_dict': {
'id': 'black-rock-shooter-tv'
},
'playlist_count': 8
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
series_id = mobj.group('id')
domain = mobj.group('domain')
pattern = re.compile(self._EPISODE_REGEX)
page = self._download_webpage(url, series_id, "Downloading series page")
mobj = pattern.findall(page)
entries = [self.url_result("http://www.soul-anime." + domain + obj) for obj in mobj]
return self.playlist_result(entries, series_id)

+ 36
- 0
youtube_dl/extractor/videofun.py View File

@ -0,0 +1,36 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse
)
class VideoFunIE(InfoExtractor):
_VALID_URL = r'http://[w.]*videofun\.me/embed/(?P<id>[0-9a-f]+)'
_TEST = {
'url': 'http://videofun.me/embed/8267659be070860af600fee7deadbcdb?w=600&h=438',
'md5': 'e37e99d665f503dd2db952f7c4dba9e6',
'info_dict': {
'id': 'Mahou-Shoujo-Madoka-Magica-07',
'ext': 'flv',
'title': 'Mahou-Shoujo-Madoka-Magica-07',
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
url, video_id, 'Downloading video page')
video_url_encoded = self._html_search_regex(
r'url: "(http://gateway\.videofun\.me[^"]+)"', webpage, 'video url')
video_url = compat_urllib_parse.unquote(video_url_encoded)
title = self._html_search_regex(r'.*/([^.]*)\.', video_url, 'title')
return {
'id': title,
'url': video_url,
'title': title,
}

Loading…
Cancel
Save