From 7beb6a402d1ad224579cd3aee41c2ccede238535 Mon Sep 17 00:00:00 2001 From: 4pr0n Date: Sun, 25 May 2014 20:10:21 -0700 Subject: [PATCH] Motherless ripper can rip search terms / tags --- .../ripper/rippers/MotherlessRipper.java | 33 ++++++++++++++----- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/MotherlessRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/MotherlessRipper.java index df6a72fe..b40765fd 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/MotherlessRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/MotherlessRipper.java @@ -39,21 +39,22 @@ public class MotherlessRipper extends AlbumRipper { @Override public URL sanitizeURL(URL url) throws MalformedURLException { - String gid = getGID(url); - URL newURL = new URL("http://motherless.com/G" + gid); - logger.debug("Sanitized URL from " + url + " to " + newURL); - return newURL; + return url; } @Override public String getGID(URL url) throws MalformedURLException { Pattern p = Pattern.compile("^https?://(www\\.)?motherless\\.com/G([MVI]?[A-F0-9]{6,8}).*$"); Matcher m = p.matcher(url.toExternalForm()); - System.err.println(url.toExternalForm()); - if (!m.matches()) { - throw new MalformedURLException("Expected URL format: http://motherless.com/GIXXXXXXX, got: " + url); + if (m.matches()) { + return m.group(m.groupCount()); } - return m.group(m.groupCount()); + p = Pattern.compile("^https?://(www\\.)?motherless\\.com/term/(images/|videos/)([a-zA-Z0-9%]+)$"); + m = p.matcher(url.toExternalForm()); + if (m.matches()) { + return m.group(m.groupCount()); + } + throw new MalformedURLException("Expected URL format: http://motherless.com/GIXXXXXXX, got: " + url); } @Override @@ -64,9 +65,21 @@ public class MotherlessRipper extends AlbumRipper { logger.info(" Retrieving " + nextURL); Document doc = Jsoup.connect(nextURL) .userAgent(USER_AGENT) + .timeout(5000) + .referrer("http://motherless.com") .get(); for (Element thumb : doc.select("div.thumb a.img-container")) { - URL url = new URL("http://" + DOMAIN + thumb.attr("href")); + String thumbURL = thumb.attr("href"); + if (thumbURL.contains("pornmd.com")) { + continue; + } + URL url; + if (!thumbURL.startsWith("http")) { + url = new URL("http://" + DOMAIN + thumbURL); + } + else { + url = new URL(thumbURL); + } index += 1; // Create thread for finding image at "url" page MotherlessImageThread mit = new MotherlessImageThread(url, index); @@ -101,6 +114,8 @@ public class MotherlessRipper extends AlbumRipper { try { Document doc = Jsoup.connect(this.url.toExternalForm()) .userAgent(USER_AGENT) + .timeout(5000) + .referrer(this.url.toExternalForm()) .get(); Pattern p = Pattern.compile("^.*__fileurl = '([^']{1,})';.*$", Pattern.DOTALL); Matcher m = p.matcher(doc.outerHtml());