From 2c2159aa326ad7a155b3095a459e530e0e575b58 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Fri, 1 Dec 2017 14:15:56 -0500 Subject: [PATCH] Oglaf ripper (#309) * Added OglafRipper * Better folder naming for Oglaf ripper * Added oglaf unit test --- .../ripme/ripper/rippers/OglafRipper.java | 87 +++++++++++++++++++ .../tst/ripper/rippers/OglafRipperTest.java | 13 +++ 2 files changed, 100 insertions(+) create mode 100644 src/main/java/com/rarchives/ripme/ripper/rippers/OglafRipper.java create mode 100644 src/test/java/com/rarchives/ripme/tst/ripper/rippers/OglafRipperTest.java diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/OglafRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/OglafRipper.java new file mode 100644 index 00000000..a5183397 --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/OglafRipper.java @@ -0,0 +1,87 @@ +package com.rarchives.ripme.ripper.rippers; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; + +import com.rarchives.ripme.ripper.AbstractHTMLRipper; +import com.rarchives.ripme.utils.Http; + +public class OglafRipper extends AbstractHTMLRipper { + + public OglafRipper(URL url) throws IOException { + super(url); + } + + @Override + public String getHost() { + return "oglaf"; + } + + @Override + public String getDomain() { + return "oglaf.com"; + } + + @Override + public String getGID(URL url) throws MalformedURLException { + Pattern p = Pattern.compile("http://oglaf\\.com/([a-zA-Z1-9_-]*)/?"); + Matcher m = p.matcher(url.toExternalForm()); + if (m.matches()) { + return m.group(1); + } + throw new MalformedURLException("Expected oglaf URL format: " + + "oglaf.com/NAME - got " + url + " instead"); + } + + @Override + public String getAlbumTitle(URL url) throws MalformedURLException { + return getDomain(); + } + + @Override + public Document getFirstPage() throws IOException { + // "url" is an instance field of the superclass + return Http.url(url).get(); + } + + @Override + public Document getNextPage(Document doc) throws IOException { + if (doc.select("div#nav > a > div#nx").first() == null) { + throw new IOException("No more pages"); + } + Element elem = doc.select("div#nav > a > div#nx").first().parent(); + String nextPage = elem.attr("href"); + // Some times this returns a empty string + // This for stops that + if (nextPage.equals("")) { + throw new IOException("No more pages"); + } + else { + sleep(1000); + return Http.url("http://oglaf.com" + nextPage).get(); + } + } + + @Override + public List getURLsFromPage(Document doc) { + List result = new ArrayList<>(); + for (Element el : doc.select("b > img#strip")) { + String imageSource = el.select("img").attr("src"); + result.add(imageSource); + } + return result; + } + + @Override + public void downloadURL(URL url, int index) { + addURLToDownload(url, getPrefix(index)); + } +} diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/OglafRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/OglafRipperTest.java new file mode 100644 index 00000000..5b580f7f --- /dev/null +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/OglafRipperTest.java @@ -0,0 +1,13 @@ +package com.rarchives.ripme.tst.ripper.rippers; + +import java.io.IOException; +import java.net.URL; + +import com.rarchives.ripme.ripper.rippers.OglafRipper; + +public class OglafRipperTest extends RippersTest { + public void testRip() throws IOException { + OglafRipper ripper = new OglafRipper(new URL("http://oglaf.com/plumes/")); + testRipper(ripper); + } +} \ No newline at end of file