From 7bc45c2cdafa3bbc9772f74f547b6329e781bba1 Mon Sep 17 00:00:00 2001 From: sukhois Date: Fri, 19 Jun 2015 07:52:26 +0200 Subject: [PATCH] Added a ripper for newsfilter.org galleries --- .../ripper/rippers/NewsfilterRipper.java | 96 +++++++++++++++++++ 1 file changed, 96 insertions(+) create mode 100644 src/main/java/com/rarchives/ripme/ripper/rippers/NewsfilterRipper.java diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/NewsfilterRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/NewsfilterRipper.java new file mode 100644 index 00000000..8d88f010 --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/NewsfilterRipper.java @@ -0,0 +1,96 @@ +package com.rarchives.ripme.ripper.rippers; + + +import com.rarchives.ripme.ripper.AbstractHTMLRipper; +import com.rarchives.ripme.ripper.AlbumRipper; +import org.jsoup.Connection; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public class NewsfilterRipper extends AlbumRipper { + private static final String HOST = "newsfilter"; + + public NewsfilterRipper(URL url) throws IOException { + super(url); + } + + @Override + public boolean canRip(URL url) { + //http://newsfilter.org/gallery/he-doubted-she-would-fuck-on-cam-happy-to-be-proven-wrong-216799 + Pattern p = Pattern.compile("^https?://([wm]+\\.)?newsfilter\\.org/gallery/.+$"); + Matcher m = p.matcher(url.toExternalForm()); + return m.matches(); + } + + @Override + public URL sanitizeURL(URL url) throws MalformedURLException { + String u = url.toExternalForm(); + if (u.indexOf('#') >= 0) { + u = u.substring(0, u.indexOf('#')); + } + u = u.replace("https?://m\\.newsfilter\\.org", "http://newsfilter.org"); + return new URL(u); + } + + @Override + public void rip() throws IOException { + String gid = getGID(this.url), + theurl = "http://newsfilter.org/gallery/" + gid; + + Connection.Response resp = null; + logger.info("Loading " + theurl); + resp = Jsoup.connect(theurl) + .timeout(5000) + .referrer("") + .userAgent(USER_AGENT) + .method(Connection.Method.GET) + .execute(); + + Document doc = resp.parse(); + Element gallery = doc.getElementById("thegalmain"); + Elements piclinks = gallery.getElementsByAttributeValue("itemprop","contentURL"); + for (Element picelem : piclinks) { + String picurl = "http://newsfilter.org"+picelem.attr("href"); + logger.info("Getting to picture page: "+picurl); + resp = Jsoup.connect(picurl) + .timeout(5000) + .referrer(theurl) + .userAgent(USER_AGENT) + .method(Connection.Method.GET) + .execute(); + Document picdoc = resp.parse(); + String dlurl = picdoc.getElementsByClass("downloadimagebutton") + .first() + .attr("href"); + addURLToDownload(new URL(dlurl)); + } + waitForThreads(); + } + + @Override + public String getHost() { + return HOST; + } + + @Override + public String getGID(URL url) throws MalformedURLException { + Pattern p = Pattern.compile("^https?://([wm]+\\.)?newsfilter\\.org/gallery/([^/]+)$"); + Matcher m = p.matcher(url.toExternalForm()); + if (m.matches()) { + return m.group(2); + } + throw new MalformedURLException("Expected newsfilter gallery format: " + + "http://newsfilter.org/gallery/galleryid" + + " Got: " + url); + } + +}