From 7cf3a013791554bdc32ec84a3af3023662c7831e Mon Sep 17 00:00:00 2001 From: 0x1f595 <0x1f595@users.noreply.github.com> Date: Sat, 14 Jul 2018 13:06:29 -0600 Subject: [PATCH 1/2] Adding Jab Archives ripper This currently only supports ripping bottom-level folders. If a folder you rip contains another folder, that subfolder will not be fully ripped, only the source image from its thumbnail will be included in the rip. --- .../ripper/rippers/JabArchivesRipper.java | 78 +++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 src/main/java/com/rarchives/ripme/ripper/rippers/JabArchivesRipper.java diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/JabArchivesRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/JabArchivesRipper.java new file mode 100644 index 00000000..eeba986a --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/JabArchivesRipper.java @@ -0,0 +1,78 @@ +package com.rarchives.ripme.ripper.rippers; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import com.rarchives.ripme.ripper.AbstractHTMLRipper; +import com.rarchives.ripme.utils.Http; + +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +public class JabArchivesRipper extends AbstractHTMLRipper { + + public JabArchivesRipper(URL url) throws IOException { + super(url); + } + + @Override + public String getHost() { + return "jabarchives"; + } + + @Override + public String getDomain() { + return "jabarchives.com"; + } + + @Override + public String getGID(URL url) throws MalformedURLException { + Pattern p = Pattern.compile("^https?://jabarchives.com/main/view/([a-zA-Z0-9_]+).*$"); + Matcher m = p.matcher(url.toExternalForm()); + if (m.matches()) { + // Return the text contained between () in the regex + return m.group(1); + } + throw new MalformedURLException( + "Expected javarchives.com URL format: " + + "jabarchives.com/main/view/albumname - got " + url + " instead"); + } + + @Override + public Document getFirstPage() throws IOException { + // "url" is an instance field of the superclass + return Http.url(url).get(); + } + + @Override + public Document getNextPage(Document doc) throws IOException { + // Find next page + Elements hrefs = doc.select("a[title=\"Next page\"]"); + if (hrefs.isEmpty()) { + throw new IOException("No more pages"); + } + String nextUrl = "http://jabarchives.com" + hrefs.first().attr("href"); + sleep(500); + return Http.url(nextUrl).get(); + } + + @Override + public List getURLsFromPage(Document doc) { + List result = new ArrayList(); + for (Element el : doc.select("#contentMain img")) { + result.add("http://jabarchives.com" + el.attr("src").replace("thumb", "large")); + } + return result; + } + + @Override + public void downloadURL(URL url, int index) { + addURLToDownload(url, getPrefix(index)); + } +} From daae0747137726cb68a8ff948fb6f30cb1326198 Mon Sep 17 00:00:00 2001 From: 0x1f595 <0x1f595@users.noreply.github.com> Date: Sat, 14 Jul 2018 13:10:53 -0600 Subject: [PATCH 2/2] Use SSL by default on Jav Archives --- .../com/rarchives/ripme/ripper/rippers/JabArchivesRipper.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/JabArchivesRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/JabArchivesRipper.java index eeba986a..1ad0b2f7 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/JabArchivesRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/JabArchivesRipper.java @@ -57,7 +57,7 @@ public class JabArchivesRipper extends AbstractHTMLRipper { if (hrefs.isEmpty()) { throw new IOException("No more pages"); } - String nextUrl = "http://jabarchives.com" + hrefs.first().attr("href"); + String nextUrl = "https://jabarchives.com" + hrefs.first().attr("href"); sleep(500); return Http.url(nextUrl).get(); } @@ -66,7 +66,7 @@ public class JabArchivesRipper extends AbstractHTMLRipper { public List getURLsFromPage(Document doc) { List result = new ArrayList(); for (Element el : doc.select("#contentMain img")) { - result.add("http://jabarchives.com" + el.attr("src").replace("thumb", "large")); + result.add("https://jabarchives.com" + el.attr("src").replace("thumb", "large")); } return result; }