From 28ce42a54f2ad36de99fe825f0e8b4f0203d9076 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Sun, 11 Mar 2018 23:03:24 -0400 Subject: [PATCH] Added sinfest ripper --- .../ripme/ripper/rippers/SinfestRipper.java | 80 +++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 src/main/java/com/rarchives/ripme/ripper/rippers/SinfestRipper.java diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/SinfestRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/SinfestRipper.java new file mode 100644 index 00000000..d30e9b63 --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/SinfestRipper.java @@ -0,0 +1,80 @@ +package com.rarchives.ripme.ripper.rippers; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; + +import com.rarchives.ripme.ripper.AbstractHTMLRipper; +import com.rarchives.ripme.utils.Http; + +public class SinfestRipper extends AbstractHTMLRipper { + + public SinfestRipper(URL url) throws IOException { + super(url); + } + + @Override + public String getHost() { + return "sinfest"; + } + + @Override + public String getDomain() { + return "sinfest.net"; + } + + @Override + public String getGID(URL url) throws MalformedURLException { + Pattern p = Pattern.compile("https?://sinfest.net/view.php\\?date=([0-9-]*)/?"); + Matcher m = p.matcher(url.toExternalForm()); + if (m.matches()) { + return m.group(1); + } + throw new MalformedURLException("Expected sinfest URL format: " + + "sinfest.net/view.php?date=XXXX-XX-XX/ - got " + url + " instead"); + } + + @Override + public Document getFirstPage() throws IOException { + // "url" is an instance field of the superclass + return Http.url(url).get(); + } + + @Override + public Document getNextPage(Document doc) throws IOException { + Element elem = doc.select("td.style5 > a > img").last(); + logger.info(elem.parent().attr("href")); + if (elem == null || elem.parent().attr("href").equals("view.php?date=")) { + throw new IOException("No more pages"); + } + String nextPage = elem.parent().attr("href"); + // Some times this returns a empty string + // This for stops that + if (nextPage == "") { + return null; + } + else { + return Http.url("http://sinfest.net/" + nextPage).get(); + } + } + + @Override + public List getURLsFromPage(Document doc) { + List result = new ArrayList<>(); + Element elem = doc.select("tbody > tr > td > img").last(); + result.add("http://sinfest.net/" + elem.attr("src")); + return result; + } + + @Override + public void downloadURL(URL url, int index) { + addURLToDownload(url, getPrefix(index)); + } +}