From 7bc45c2cdafa3bbc9772f74f547b6329e781bba1 Mon Sep 17 00:00:00 2001 From: sukhois Date: Fri, 19 Jun 2015 07:52:26 +0200 Subject: [PATCH 1/2] Added a ripper for newsfilter.org galleries --- .../ripper/rippers/NewsfilterRipper.java | 96 +++++++++++++++++++ 1 file changed, 96 insertions(+) create mode 100644 src/main/java/com/rarchives/ripme/ripper/rippers/NewsfilterRipper.java diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/NewsfilterRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/NewsfilterRipper.java new file mode 100644 index 00000000..8d88f010 --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/NewsfilterRipper.java @@ -0,0 +1,96 @@ +package com.rarchives.ripme.ripper.rippers; + + +import com.rarchives.ripme.ripper.AbstractHTMLRipper; +import com.rarchives.ripme.ripper.AlbumRipper; +import org.jsoup.Connection; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public class NewsfilterRipper extends AlbumRipper { + private static final String HOST = "newsfilter"; + + public NewsfilterRipper(URL url) throws IOException { + super(url); + } + + @Override + public boolean canRip(URL url) { + //http://newsfilter.org/gallery/he-doubted-she-would-fuck-on-cam-happy-to-be-proven-wrong-216799 + Pattern p = Pattern.compile("^https?://([wm]+\\.)?newsfilter\\.org/gallery/.+$"); + Matcher m = p.matcher(url.toExternalForm()); + return m.matches(); + } + + @Override + public URL sanitizeURL(URL url) throws MalformedURLException { + String u = url.toExternalForm(); + if (u.indexOf('#') >= 0) { + u = u.substring(0, u.indexOf('#')); + } + u = u.replace("https?://m\\.newsfilter\\.org", "http://newsfilter.org"); + return new URL(u); + } + + @Override + public void rip() throws IOException { + String gid = getGID(this.url), + theurl = "http://newsfilter.org/gallery/" + gid; + + Connection.Response resp = null; + logger.info("Loading " + theurl); + resp = Jsoup.connect(theurl) + .timeout(5000) + .referrer("") + .userAgent(USER_AGENT) + .method(Connection.Method.GET) + .execute(); + + Document doc = resp.parse(); + Element gallery = doc.getElementById("thegalmain"); + Elements piclinks = gallery.getElementsByAttributeValue("itemprop","contentURL"); + for (Element picelem : piclinks) { + String picurl = "http://newsfilter.org"+picelem.attr("href"); + logger.info("Getting to picture page: "+picurl); + resp = Jsoup.connect(picurl) + .timeout(5000) + .referrer(theurl) + .userAgent(USER_AGENT) + .method(Connection.Method.GET) + .execute(); + Document picdoc = resp.parse(); + String dlurl = picdoc.getElementsByClass("downloadimagebutton") + .first() + .attr("href"); + addURLToDownload(new URL(dlurl)); + } + waitForThreads(); + } + + @Override + public String getHost() { + return HOST; + } + + @Override + public String getGID(URL url) throws MalformedURLException { + Pattern p = Pattern.compile("^https?://([wm]+\\.)?newsfilter\\.org/gallery/([^/]+)$"); + Matcher m = p.matcher(url.toExternalForm()); + if (m.matches()) { + return m.group(2); + } + throw new MalformedURLException("Expected newsfilter gallery format: " + + "http://newsfilter.org/gallery/galleryid" + + " Got: " + url); + } + +} From b0a503b8643dd49e1f57210bdb074442b86a45f6 Mon Sep 17 00:00:00 2001 From: Chikitulfo Date: Fri, 26 Jun 2015 09:51:05 +0200 Subject: [PATCH 2/2] Fixed Newsfilter.org ripper for new webpage changes --- .../ripme/ripper/rippers/NewsfilterRipper.java | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/NewsfilterRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/NewsfilterRipper.java index 8d88f010..3eb1b43d 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/NewsfilterRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/NewsfilterRipper.java @@ -1,7 +1,6 @@ package com.rarchives.ripme.ripper.rippers; -import com.rarchives.ripme.ripper.AbstractHTMLRipper; import com.rarchives.ripme.ripper.AlbumRipper; import org.jsoup.Connection; import org.jsoup.Jsoup; @@ -12,7 +11,6 @@ import org.jsoup.select.Elements; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; -import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -56,8 +54,10 @@ public class NewsfilterRipper extends AlbumRipper { .execute(); Document doc = resp.parse(); - Element gallery = doc.getElementById("thegalmain"); - Elements piclinks = gallery.getElementsByAttributeValue("itemprop","contentURL"); + //Element gallery = doc.getElementById("thegalmain"); + //Elements piclinks = gallery.getElementsByAttributeValue("itemprop","contentURL"); + Pattern pat = Pattern.compile(gid+"/\\d+"); + Elements piclinks = doc.getElementsByAttributeValueMatching("href", pat); for (Element picelem : piclinks) { String picurl = "http://newsfilter.org"+picelem.attr("href"); logger.info("Getting to picture page: "+picurl); @@ -68,9 +68,7 @@ public class NewsfilterRipper extends AlbumRipper { .method(Connection.Method.GET) .execute(); Document picdoc = resp.parse(); - String dlurl = picdoc.getElementsByClass("downloadimagebutton") - .first() - .attr("href"); + String dlurl = picdoc.getElementsByAttributeValue("itemprop","contentURL").first().attr("src"); addURLToDownload(new URL(dlurl)); } waitForThreads();