Clean up chan ripper and removed dead chans

This commit is contained in:
cyian-1756 2018-10-08 09:45:26 -04:00
parent c5e2e225a3
commit 131f5637c1
2 changed files with 43 additions and 26 deletions

View File

@ -17,10 +17,16 @@ import org.jsoup.nodes.Element;
public class ChanRipper extends AbstractHTMLRipper { public class ChanRipper extends AbstractHTMLRipper {
private static List<ChanSite> explicit_domains = Arrays.asList( private static List<ChanSite> explicit_domains = Arrays.asList(
new ChanSite(Arrays.asList("boards.4chan.org"), Arrays.asList("4cdn.org", "is.4chan.org", "is2.4chan.org", "is3.4chan.org")), new ChanSite("boards.4chan.org", Arrays.asList("4cdn.org", "is.4chan.org", "is2.4chan.org", "is3.4chan.org")),
new ChanSite(Arrays.asList("4archive.org"), Arrays.asList("imgur.com")), new ChanSite("4archive.org", "imgur.com"),
new ChanSite(Arrays.asList("archive.4plebs.org"), Arrays.asList("img.4plebs.org")), new ChanSite("archive.4plebs.org", "img.4plebs.org"),
new ChanSite(Arrays.asList("yuki.la"), Arrays.asList("55chan.org")) new ChanSite("yuki.la", "ii.yuki.la"),
new ChanSite("55chan.org"),
new ChanSite("desuchan.net"),
new ChanSite("boards.420chan.org"),
new ChanSite("7chan.org"),
new ChanSite("desuarchive.org", "desu-usergeneratedcontent.xyz"),
new ChanSite("8ch.net", "media.8ch.net")
); );
private static List<String> url_piece_blacklist = Arrays.asList( private static List<String> url_piece_blacklist = Arrays.asList(
@ -80,32 +86,12 @@ public class ChanRipper extends AbstractHTMLRipper {
@Override @Override
public boolean canRip(URL url) { public boolean canRip(URL url) {
for (ChanSite _chanSite : explicit_domains) { for (ChanSite _chanSite : explicit_domains) {
LOGGER.info(_chanSite.domains);
if (_chanSite.domains.contains(url.getHost())) { if (_chanSite.domains.contains(url.getHost())) {
return true; return true;
} }
} }
if (url.toExternalForm().contains("desuchan.net") && url.toExternalForm().contains("/res/")) {
return true;
}
if (url.toExternalForm().contains("boards.420chan.org") && url.toExternalForm().contains("/res/")) {
return true;
}
if (url.toExternalForm().contains("7chan.org") && url.toExternalForm().contains("/res/")) {
return true;
}
if (url.toExternalForm().contains("xchan.pw") && url.toExternalForm().contains("/board/")) {
return true;
}
if (url.toExternalForm().contains("desuarchive.org")) {
return true;
}
if (url.toExternalForm().contains("8ch.net") && url.toExternalForm().contains("/res/")) {
return true;
}
if (url.toExternalForm().contains("55chan.org") && url.toExternalForm().contains("/res/")) {
return true;
}
return false; return false;
} }
@ -209,7 +195,7 @@ public class ChanRipper extends AbstractHTMLRipper {
} }
if (self_hosted || generalChanSite) { if (self_hosted || generalChanSite) {
p = Pattern.compile("^.*\\.(jpg|jpeg|png|gif|apng|webp|tif|tiff|webm)$", Pattern.CASE_INSENSITIVE); p = Pattern.compile("^.*\\.(jpg|jpeg|png|gif|apng|webp|tif|tiff|webm|mp4)$", Pattern.CASE_INSENSITIVE);
m = p.matcher(href); m = p.matcher(href);
if (m.matches()) { if (m.matches()) {
if (href.startsWith("//")) { if (href.startsWith("//")) {

View File

@ -1,5 +1,6 @@
package com.rarchives.ripme.ripper.rippers.ripperhelpers; package com.rarchives.ripme.ripper.rippers.ripperhelpers;
import java.util.Arrays;
import java.util.List; import java.util.List;
public class ChanSite { public class ChanSite {
@ -19,6 +20,36 @@ public class ChanSite {
cdnDomains = CdnDomains; cdnDomains = CdnDomains;
} }
public ChanSite(String Domain, List<String> CdnDomains) {
if (Domain.isEmpty()) {
throw new IllegalArgumentException("Domains");
}
if (CdnDomains.isEmpty()) {
throw new IllegalArgumentException("CdnDomains");
}
domains = Arrays.asList(Domain);
cdnDomains = CdnDomains;
}
public ChanSite(String Domain, String CdnDomain) {
if (Domain.isEmpty()) {
throw new IllegalArgumentException("Domains");
}
if (CdnDomain.isEmpty()) {
throw new IllegalArgumentException("CdnDomains");
}
domains = Arrays.asList(Domain);
cdnDomains = Arrays.asList(CdnDomain);
}
public ChanSite(String Domain) {
if (Domain.isEmpty()) {
throw new IllegalArgumentException("Domains");
}
domains = Arrays.asList(Domain);
cdnDomains = Arrays.asList(Domain);
}
public ChanSite(List<String> Domains) { public ChanSite(List<String> Domains) {
if (Domains.isEmpty()) { if (Domains.isEmpty()) {
throw new IllegalArgumentException("Domains"); throw new IllegalArgumentException("Domains");