From 131f5637c10192dc821e09fe832071e53d76f42e Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Mon, 8 Oct 2018 09:45:26 -0400 Subject: [PATCH] Clean up chan ripper and removed dead chans --- .../ripme/ripper/rippers/ChanRipper.java | 38 ++++++------------- .../rippers/ripperhelpers/ChanSite.java | 31 +++++++++++++++ 2 files changed, 43 insertions(+), 26 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ChanRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ChanRipper.java index c580690a..223e3705 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/ChanRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ChanRipper.java @@ -17,10 +17,16 @@ import org.jsoup.nodes.Element; public class ChanRipper extends AbstractHTMLRipper { private static List explicit_domains = Arrays.asList( - new ChanSite(Arrays.asList("boards.4chan.org"), Arrays.asList("4cdn.org", "is.4chan.org", "is2.4chan.org", "is3.4chan.org")), - new ChanSite(Arrays.asList("4archive.org"), Arrays.asList("imgur.com")), - new ChanSite(Arrays.asList("archive.4plebs.org"), Arrays.asList("img.4plebs.org")), - new ChanSite(Arrays.asList("yuki.la"), Arrays.asList("55chan.org")) + new ChanSite("boards.4chan.org", Arrays.asList("4cdn.org", "is.4chan.org", "is2.4chan.org", "is3.4chan.org")), + new ChanSite("4archive.org", "imgur.com"), + new ChanSite("archive.4plebs.org", "img.4plebs.org"), + new ChanSite("yuki.la", "ii.yuki.la"), + new ChanSite("55chan.org"), + new ChanSite("desuchan.net"), + new ChanSite("boards.420chan.org"), + new ChanSite("7chan.org"), + new ChanSite("desuarchive.org", "desu-usergeneratedcontent.xyz"), + new ChanSite("8ch.net", "media.8ch.net") ); private static List url_piece_blacklist = Arrays.asList( @@ -80,32 +86,12 @@ public class ChanRipper extends AbstractHTMLRipper { @Override public boolean canRip(URL url) { for (ChanSite _chanSite : explicit_domains) { + LOGGER.info(_chanSite.domains); if (_chanSite.domains.contains(url.getHost())) { return true; } } - if (url.toExternalForm().contains("desuchan.net") && url.toExternalForm().contains("/res/")) { - return true; - } - if (url.toExternalForm().contains("boards.420chan.org") && url.toExternalForm().contains("/res/")) { - return true; - } - if (url.toExternalForm().contains("7chan.org") && url.toExternalForm().contains("/res/")) { - return true; - } - if (url.toExternalForm().contains("xchan.pw") && url.toExternalForm().contains("/board/")) { - return true; - } - if (url.toExternalForm().contains("desuarchive.org")) { - return true; - } - if (url.toExternalForm().contains("8ch.net") && url.toExternalForm().contains("/res/")) { - return true; - } - if (url.toExternalForm().contains("55chan.org") && url.toExternalForm().contains("/res/")) { - return true; - } return false; } @@ -209,7 +195,7 @@ public class ChanRipper extends AbstractHTMLRipper { } if (self_hosted || generalChanSite) { - p = Pattern.compile("^.*\\.(jpg|jpeg|png|gif|apng|webp|tif|tiff|webm)$", Pattern.CASE_INSENSITIVE); + p = Pattern.compile("^.*\\.(jpg|jpeg|png|gif|apng|webp|tif|tiff|webm|mp4)$", Pattern.CASE_INSENSITIVE); m = p.matcher(href); if (m.matches()) { if (href.startsWith("//")) { diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ripperhelpers/ChanSite.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ripperhelpers/ChanSite.java index 4b4dd87f..a427123f 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/ripperhelpers/ChanSite.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ripperhelpers/ChanSite.java @@ -1,5 +1,6 @@ package com.rarchives.ripme.ripper.rippers.ripperhelpers; +import java.util.Arrays; import java.util.List; public class ChanSite { @@ -19,6 +20,36 @@ public class ChanSite { cdnDomains = CdnDomains; } + public ChanSite(String Domain, List CdnDomains) { + if (Domain.isEmpty()) { + throw new IllegalArgumentException("Domains"); + } + if (CdnDomains.isEmpty()) { + throw new IllegalArgumentException("CdnDomains"); + } + domains = Arrays.asList(Domain); + cdnDomains = CdnDomains; + } + + public ChanSite(String Domain, String CdnDomain) { + if (Domain.isEmpty()) { + throw new IllegalArgumentException("Domains"); + } + if (CdnDomain.isEmpty()) { + throw new IllegalArgumentException("CdnDomains"); + } + domains = Arrays.asList(Domain); + cdnDomains = Arrays.asList(CdnDomain); + } + + public ChanSite(String Domain) { + if (Domain.isEmpty()) { + throw new IllegalArgumentException("Domains"); + } + domains = Arrays.asList(Domain); + cdnDomains = Arrays.asList(Domain); + } + public ChanSite(List Domains) { if (Domains.isEmpty()) { throw new IllegalArgumentException("Domains");