From 313f3b0de0baf836a125ad358df8860350deaf2e Mon Sep 17 00:00:00 2001 From: 4pr0n Date: Fri, 13 Jun 2014 23:14:24 -0700 Subject: [PATCH] 1.0.67 - Tumblr ripper can rip external domains You have to enter the URL in a weird format, though. For example, to rip `fiddle.se`, you need to try to rip: `http://fiddle.se.tumblr.com` Tags and /post/ rips are still supported --- pom.xml | 2 +- .../ripme/ripper/rippers/TumblrRipper.java | 59 ++++++++++++++++--- .../com/rarchives/ripme/ui/MainWindow.java | 4 +- .../com/rarchives/ripme/ui/UpdateUtils.java | 2 +- 4 files changed, 54 insertions(+), 13 deletions(-) diff --git a/pom.xml b/pom.xml index 40916887..89f4820d 100644 --- a/pom.xml +++ b/pom.xml @@ -4,7 +4,7 @@ com.rarchives.ripme ripme jar - 1.0.66 + 1.0.67 ripme http://rip.rarchives.com diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/TumblrRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/TumblrRipper.java index 2af56c1e..90873f08 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/TumblrRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/TumblrRipper.java @@ -6,6 +6,7 @@ import java.net.URL; import java.util.regex.Matcher; import java.util.regex.Pattern; +import org.apache.commons.lang.StringUtils; import org.json.JSONArray; import org.json.JSONObject; import org.jsoup.Jsoup; @@ -27,11 +28,13 @@ public class TumblrRipper extends AlbumRipper { private ALBUM_TYPE albumType; private String subdomain, tagName, postNumber; - private final String API_KEY; + private static final String API_KEY; + static { + API_KEY = Utils.getConfigString("tumblr.auth", null); + } public TumblrRipper(URL url) throws IOException { super(url); - API_KEY = Utils.getConfigString("tumblr.auth", null); if (API_KEY == null) { throw new IOException("Could not find tumblr authentication key in configuration"); } @@ -44,9 +47,39 @@ public class TumblrRipper extends AlbumRipper { @Override public URL sanitizeURL(URL url) throws MalformedURLException { + String u = url.toExternalForm(); + // Convert .tumblr.com/path to /path if needed + if (StringUtils.countMatches(u, ".") > 2) { + url = new URL(u.replace(".tumblr.com", "")); + if (isTumblrURL(url)) { + logger.info("Detected tumblr site: " + url); + } + else { + logger.info("Not a tumblr site: " + url); + } + } return url; } + public boolean isTumblrURL(URL url) { + String checkURL = "http://api.tumblr.com/v2/blog/"; + checkURL += url.getHost(); + checkURL += "/info?api_key=" + API_KEY; + try { + Document doc = Jsoup.connect(checkURL) + .ignoreContentType(true) + .userAgent(USER_AGENT) + .get(); + String jsonString = doc.body().html().replaceAll(""", "\""); + JSONObject json = new JSONObject(jsonString); + int status = json.getJSONObject("meta").getInt("status"); + return status == 200; + } catch (IOException e) { + logger.error("Error while checking possible tumblr domain: " + url.getHost(), e); + } + return false; + } + @Override public void rip() throws IOException { String[] mediaTypes; @@ -59,6 +92,9 @@ public class TumblrRipper extends AlbumRipper { for (String mediaType : mediaTypes) { offset = 0; while (true) { + if (isStopped()) { + break; + } String apiURL = getTumblrApiURL(mediaType, offset); logger.info(" Retrieving " + apiURL); Document doc = Jsoup.connect(apiURL) @@ -79,6 +115,9 @@ public class TumblrRipper extends AlbumRipper { } offset += 20; } + if (isStopped()) { + break; + } } waitForThreads(); } @@ -134,7 +173,7 @@ public class TumblrRipper extends AlbumRipper { if (albumType == ALBUM_TYPE.POST) { sb.append("http://api.tumblr.com/v2/blog/") .append(subdomain) - .append(".tumblr.com/posts?id=") + .append("/posts?id=") .append(postNumber) .append("&api_key=") .append(API_KEY); @@ -142,7 +181,7 @@ public class TumblrRipper extends AlbumRipper { } sb.append("http://api.tumblr.com/v2/blog/") .append(subdomain) - .append(".tumblr.com/posts/") + .append("/posts/") .append(mediaType) .append("?api_key=") .append(API_KEY) @@ -162,10 +201,13 @@ public class TumblrRipper extends AlbumRipper { @Override public String getGID(URL url) throws MalformedURLException { + final String DOMAIN_REGEX = "^https?://([a-zA-Z0-9\\-\\.]+)"; + Pattern p; Matcher m; + // Tagged URL - p = Pattern.compile("^https?://([a-zA-Z0-9\\-]{1,})\\.tumblr\\.com/tagged/([a-zA-Z0-9\\-%]{1,}).*$"); + p = Pattern.compile(DOMAIN_REGEX + "/tagged/([a-zA-Z0-9\\-%]+).*$"); m = p.matcher(url.toExternalForm()); if (m.matches()) { this.albumType = ALBUM_TYPE.TAG; @@ -175,7 +217,7 @@ public class TumblrRipper extends AlbumRipper { return this.subdomain + "_tag_" + this.tagName.replace("%20", " "); } // Post URL - p = Pattern.compile("^https?://([a-zA-Z0-9\\-]{1,})\\.tumblr\\.com/post/([0-9]{1,}).*$"); + p = Pattern.compile(DOMAIN_REGEX + "/post/([0-9]+).*$"); m = p.matcher(url.toExternalForm()); if (m.matches()) { this.albumType = ALBUM_TYPE.POST; @@ -184,15 +226,14 @@ public class TumblrRipper extends AlbumRipper { return this.subdomain + "_post_" + this.postNumber; } // Subdomain-level URL - p = Pattern.compile("^https?://([a-zA-Z0-9\\-]{1,})\\.tumblr\\.com/?.*$"); + p = Pattern.compile(DOMAIN_REGEX + ".*$"); m = p.matcher(url.toExternalForm()); if (m.matches()) { this.albumType = ALBUM_TYPE.SUBDOMAIN; this.subdomain = m.group(1); return this.subdomain; } - // TODO support non-tumblr.com domains - throw new MalformedURLException("Expected format: http://user.tumblr.com[/tagged/tag|/post/postno]"); + throw new MalformedURLException("Expected format: http://subdomain[.tumblr.com][/tagged/tag|/post/postno]"); } } diff --git a/src/main/java/com/rarchives/ripme/ui/MainWindow.java b/src/main/java/com/rarchives/ripme/ui/MainWindow.java index af07c25b..9ceeebf9 100644 --- a/src/main/java/com/rarchives/ripme/ui/MainWindow.java +++ b/src/main/java/com/rarchives/ripme/ui/MainWindow.java @@ -748,8 +748,8 @@ public class MainWindow implements Runnable, RipStatusHandler { ripper.setup(); } catch (Exception e) { failed = true; - logger.error("Could not find ripper for URL " + url); - error("Could not find ripper for given URL"); + logger.error("Could not find ripper for URL " + url, e); + error("Error: " + e.getMessage()); } if (!failed) { try { diff --git a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java index e1827dcd..493afd7c 100644 --- a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java +++ b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java @@ -21,7 +21,7 @@ import com.rarchives.ripme.utils.Utils; public class UpdateUtils { private static final Logger logger = Logger.getLogger(UpdateUtils.class); - private static final String DEFAULT_VERSION = "1.0.66"; + private static final String DEFAULT_VERSION = "1.0.67"; private static final String updateJsonURL = "http://rarchives.com/ripme.json"; private static final String updateJarURL = "http://rarchives.com/ripme.jar"; private static final String mainFileName = "ripme.jar";