Clean up chan ripper and removed dead chans

This commit is contained in:
cyian-1756 2018-10-08 09:45:26 -04:00
parent c5e2e225a3
commit 131f5637c1
2 changed files with 43 additions and 26 deletions

View File

@ -17,10 +17,16 @@ import org.jsoup.nodes.Element;
public class ChanRipper extends AbstractHTMLRipper {
private static List<ChanSite> explicit_domains = Arrays.asList(
new ChanSite(Arrays.asList("boards.4chan.org"), Arrays.asList("4cdn.org", "is.4chan.org", "is2.4chan.org", "is3.4chan.org")),
new ChanSite(Arrays.asList("4archive.org"), Arrays.asList("imgur.com")),
new ChanSite(Arrays.asList("archive.4plebs.org"), Arrays.asList("img.4plebs.org")),
new ChanSite(Arrays.asList("yuki.la"), Arrays.asList("55chan.org"))
new ChanSite("boards.4chan.org", Arrays.asList("4cdn.org", "is.4chan.org", "is2.4chan.org", "is3.4chan.org")),
new ChanSite("4archive.org", "imgur.com"),
new ChanSite("archive.4plebs.org", "img.4plebs.org"),
new ChanSite("yuki.la", "ii.yuki.la"),
new ChanSite("55chan.org"),
new ChanSite("desuchan.net"),
new ChanSite("boards.420chan.org"),
new ChanSite("7chan.org"),
new ChanSite("desuarchive.org", "desu-usergeneratedcontent.xyz"),
new ChanSite("8ch.net", "media.8ch.net")
);
private static List<String> url_piece_blacklist = Arrays.asList(
@ -80,32 +86,12 @@ public class ChanRipper extends AbstractHTMLRipper {
@Override
public boolean canRip(URL url) {
for (ChanSite _chanSite : explicit_domains) {
LOGGER.info(_chanSite.domains);
if (_chanSite.domains.contains(url.getHost())) {
return true;
}
}
if (url.toExternalForm().contains("desuchan.net") && url.toExternalForm().contains("/res/")) {
return true;
}
if (url.toExternalForm().contains("boards.420chan.org") && url.toExternalForm().contains("/res/")) {
return true;
}
if (url.toExternalForm().contains("7chan.org") && url.toExternalForm().contains("/res/")) {
return true;
}
if (url.toExternalForm().contains("xchan.pw") && url.toExternalForm().contains("/board/")) {
return true;
}
if (url.toExternalForm().contains("desuarchive.org")) {
return true;
}
if (url.toExternalForm().contains("8ch.net") && url.toExternalForm().contains("/res/")) {
return true;
}
if (url.toExternalForm().contains("55chan.org") && url.toExternalForm().contains("/res/")) {
return true;
}
return false;
}
@ -209,7 +195,7 @@ public class ChanRipper extends AbstractHTMLRipper {
}
if (self_hosted || generalChanSite) {
p = Pattern.compile("^.*\\.(jpg|jpeg|png|gif|apng|webp|tif|tiff|webm)$", Pattern.CASE_INSENSITIVE);
p = Pattern.compile("^.*\\.(jpg|jpeg|png|gif|apng|webp|tif|tiff|webm|mp4)$", Pattern.CASE_INSENSITIVE);
m = p.matcher(href);
if (m.matches()) {
if (href.startsWith("//")) {

View File

@ -1,5 +1,6 @@
package com.rarchives.ripme.ripper.rippers.ripperhelpers;
import java.util.Arrays;
import java.util.List;
public class ChanSite {
@ -19,6 +20,36 @@ public class ChanSite {
cdnDomains = CdnDomains;
}
public ChanSite(String Domain, List<String> CdnDomains) {
if (Domain.isEmpty()) {
throw new IllegalArgumentException("Domains");
}
if (CdnDomains.isEmpty()) {
throw new IllegalArgumentException("CdnDomains");
}
domains = Arrays.asList(Domain);
cdnDomains = CdnDomains;
}
public ChanSite(String Domain, String CdnDomain) {
if (Domain.isEmpty()) {
throw new IllegalArgumentException("Domains");
}
if (CdnDomain.isEmpty()) {
throw new IllegalArgumentException("CdnDomains");
}
domains = Arrays.asList(Domain);
cdnDomains = Arrays.asList(CdnDomain);
}
public ChanSite(String Domain) {
if (Domain.isEmpty()) {
throw new IllegalArgumentException("Domains");
}
domains = Arrays.asList(Domain);
cdnDomains = Arrays.asList(Domain);
}
public ChanSite(List<String> Domains) {
if (Domains.isEmpty()) {
throw new IllegalArgumentException("Domains");