1.0.67 - Tumblr ripper can rip external domains
You have to enter the URL in a weird format, though. For example, to rip `fiddle.se`, you need to try to rip: `http://fiddle.se.tumblr.com` Tags and /post/ rips are still supported
This commit is contained in:
parent
3d06bb51e2
commit
313f3b0de0
2
pom.xml
2
pom.xml
@ -4,7 +4,7 @@
|
|||||||
<groupId>com.rarchives.ripme</groupId>
|
<groupId>com.rarchives.ripme</groupId>
|
||||||
<artifactId>ripme</artifactId>
|
<artifactId>ripme</artifactId>
|
||||||
<packaging>jar</packaging>
|
<packaging>jar</packaging>
|
||||||
<version>1.0.66</version>
|
<version>1.0.67</version>
|
||||||
<name>ripme</name>
|
<name>ripme</name>
|
||||||
<url>http://rip.rarchives.com</url>
|
<url>http://rip.rarchives.com</url>
|
||||||
<properties>
|
<properties>
|
||||||
|
@ -6,6 +6,7 @@ import java.net.URL;
|
|||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
import org.apache.commons.lang.StringUtils;
|
||||||
import org.json.JSONArray;
|
import org.json.JSONArray;
|
||||||
import org.json.JSONObject;
|
import org.json.JSONObject;
|
||||||
import org.jsoup.Jsoup;
|
import org.jsoup.Jsoup;
|
||||||
@ -27,11 +28,13 @@ public class TumblrRipper extends AlbumRipper {
|
|||||||
private ALBUM_TYPE albumType;
|
private ALBUM_TYPE albumType;
|
||||||
private String subdomain, tagName, postNumber;
|
private String subdomain, tagName, postNumber;
|
||||||
|
|
||||||
private final String API_KEY;
|
private static final String API_KEY;
|
||||||
|
static {
|
||||||
|
API_KEY = Utils.getConfigString("tumblr.auth", null);
|
||||||
|
}
|
||||||
|
|
||||||
public TumblrRipper(URL url) throws IOException {
|
public TumblrRipper(URL url) throws IOException {
|
||||||
super(url);
|
super(url);
|
||||||
API_KEY = Utils.getConfigString("tumblr.auth", null);
|
|
||||||
if (API_KEY == null) {
|
if (API_KEY == null) {
|
||||||
throw new IOException("Could not find tumblr authentication key in configuration");
|
throw new IOException("Could not find tumblr authentication key in configuration");
|
||||||
}
|
}
|
||||||
@ -44,9 +47,39 @@ public class TumblrRipper extends AlbumRipper {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public URL sanitizeURL(URL url) throws MalformedURLException {
|
public URL sanitizeURL(URL url) throws MalformedURLException {
|
||||||
|
String u = url.toExternalForm();
|
||||||
|
// Convert <FQDN>.tumblr.com/path to <FQDN>/path if needed
|
||||||
|
if (StringUtils.countMatches(u, ".") > 2) {
|
||||||
|
url = new URL(u.replace(".tumblr.com", ""));
|
||||||
|
if (isTumblrURL(url)) {
|
||||||
|
logger.info("Detected tumblr site: " + url);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
logger.info("Not a tumblr site: " + url);
|
||||||
|
}
|
||||||
|
}
|
||||||
return url;
|
return url;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public boolean isTumblrURL(URL url) {
|
||||||
|
String checkURL = "http://api.tumblr.com/v2/blog/";
|
||||||
|
checkURL += url.getHost();
|
||||||
|
checkURL += "/info?api_key=" + API_KEY;
|
||||||
|
try {
|
||||||
|
Document doc = Jsoup.connect(checkURL)
|
||||||
|
.ignoreContentType(true)
|
||||||
|
.userAgent(USER_AGENT)
|
||||||
|
.get();
|
||||||
|
String jsonString = doc.body().html().replaceAll(""", "\"");
|
||||||
|
JSONObject json = new JSONObject(jsonString);
|
||||||
|
int status = json.getJSONObject("meta").getInt("status");
|
||||||
|
return status == 200;
|
||||||
|
} catch (IOException e) {
|
||||||
|
logger.error("Error while checking possible tumblr domain: " + url.getHost(), e);
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void rip() throws IOException {
|
public void rip() throws IOException {
|
||||||
String[] mediaTypes;
|
String[] mediaTypes;
|
||||||
@ -59,6 +92,9 @@ public class TumblrRipper extends AlbumRipper {
|
|||||||
for (String mediaType : mediaTypes) {
|
for (String mediaType : mediaTypes) {
|
||||||
offset = 0;
|
offset = 0;
|
||||||
while (true) {
|
while (true) {
|
||||||
|
if (isStopped()) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
String apiURL = getTumblrApiURL(mediaType, offset);
|
String apiURL = getTumblrApiURL(mediaType, offset);
|
||||||
logger.info(" Retrieving " + apiURL);
|
logger.info(" Retrieving " + apiURL);
|
||||||
Document doc = Jsoup.connect(apiURL)
|
Document doc = Jsoup.connect(apiURL)
|
||||||
@ -79,6 +115,9 @@ public class TumblrRipper extends AlbumRipper {
|
|||||||
}
|
}
|
||||||
offset += 20;
|
offset += 20;
|
||||||
}
|
}
|
||||||
|
if (isStopped()) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
waitForThreads();
|
waitForThreads();
|
||||||
}
|
}
|
||||||
@ -134,7 +173,7 @@ public class TumblrRipper extends AlbumRipper {
|
|||||||
if (albumType == ALBUM_TYPE.POST) {
|
if (albumType == ALBUM_TYPE.POST) {
|
||||||
sb.append("http://api.tumblr.com/v2/blog/")
|
sb.append("http://api.tumblr.com/v2/blog/")
|
||||||
.append(subdomain)
|
.append(subdomain)
|
||||||
.append(".tumblr.com/posts?id=")
|
.append("/posts?id=")
|
||||||
.append(postNumber)
|
.append(postNumber)
|
||||||
.append("&api_key=")
|
.append("&api_key=")
|
||||||
.append(API_KEY);
|
.append(API_KEY);
|
||||||
@ -142,7 +181,7 @@ public class TumblrRipper extends AlbumRipper {
|
|||||||
}
|
}
|
||||||
sb.append("http://api.tumblr.com/v2/blog/")
|
sb.append("http://api.tumblr.com/v2/blog/")
|
||||||
.append(subdomain)
|
.append(subdomain)
|
||||||
.append(".tumblr.com/posts/")
|
.append("/posts/")
|
||||||
.append(mediaType)
|
.append(mediaType)
|
||||||
.append("?api_key=")
|
.append("?api_key=")
|
||||||
.append(API_KEY)
|
.append(API_KEY)
|
||||||
@ -162,10 +201,13 @@ public class TumblrRipper extends AlbumRipper {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getGID(URL url) throws MalformedURLException {
|
public String getGID(URL url) throws MalformedURLException {
|
||||||
|
final String DOMAIN_REGEX = "^https?://([a-zA-Z0-9\\-\\.]+)";
|
||||||
|
|
||||||
Pattern p;
|
Pattern p;
|
||||||
Matcher m;
|
Matcher m;
|
||||||
|
|
||||||
// Tagged URL
|
// Tagged URL
|
||||||
p = Pattern.compile("^https?://([a-zA-Z0-9\\-]{1,})\\.tumblr\\.com/tagged/([a-zA-Z0-9\\-%]{1,}).*$");
|
p = Pattern.compile(DOMAIN_REGEX + "/tagged/([a-zA-Z0-9\\-%]+).*$");
|
||||||
m = p.matcher(url.toExternalForm());
|
m = p.matcher(url.toExternalForm());
|
||||||
if (m.matches()) {
|
if (m.matches()) {
|
||||||
this.albumType = ALBUM_TYPE.TAG;
|
this.albumType = ALBUM_TYPE.TAG;
|
||||||
@ -175,7 +217,7 @@ public class TumblrRipper extends AlbumRipper {
|
|||||||
return this.subdomain + "_tag_" + this.tagName.replace("%20", " ");
|
return this.subdomain + "_tag_" + this.tagName.replace("%20", " ");
|
||||||
}
|
}
|
||||||
// Post URL
|
// Post URL
|
||||||
p = Pattern.compile("^https?://([a-zA-Z0-9\\-]{1,})\\.tumblr\\.com/post/([0-9]{1,}).*$");
|
p = Pattern.compile(DOMAIN_REGEX + "/post/([0-9]+).*$");
|
||||||
m = p.matcher(url.toExternalForm());
|
m = p.matcher(url.toExternalForm());
|
||||||
if (m.matches()) {
|
if (m.matches()) {
|
||||||
this.albumType = ALBUM_TYPE.POST;
|
this.albumType = ALBUM_TYPE.POST;
|
||||||
@ -184,15 +226,14 @@ public class TumblrRipper extends AlbumRipper {
|
|||||||
return this.subdomain + "_post_" + this.postNumber;
|
return this.subdomain + "_post_" + this.postNumber;
|
||||||
}
|
}
|
||||||
// Subdomain-level URL
|
// Subdomain-level URL
|
||||||
p = Pattern.compile("^https?://([a-zA-Z0-9\\-]{1,})\\.tumblr\\.com/?.*$");
|
p = Pattern.compile(DOMAIN_REGEX + ".*$");
|
||||||
m = p.matcher(url.toExternalForm());
|
m = p.matcher(url.toExternalForm());
|
||||||
if (m.matches()) {
|
if (m.matches()) {
|
||||||
this.albumType = ALBUM_TYPE.SUBDOMAIN;
|
this.albumType = ALBUM_TYPE.SUBDOMAIN;
|
||||||
this.subdomain = m.group(1);
|
this.subdomain = m.group(1);
|
||||||
return this.subdomain;
|
return this.subdomain;
|
||||||
}
|
}
|
||||||
// TODO support non-tumblr.com domains
|
throw new MalformedURLException("Expected format: http://subdomain[.tumblr.com][/tagged/tag|/post/postno]");
|
||||||
throw new MalformedURLException("Expected format: http://user.tumblr.com[/tagged/tag|/post/postno]");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -748,8 +748,8 @@ public class MainWindow implements Runnable, RipStatusHandler {
|
|||||||
ripper.setup();
|
ripper.setup();
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
failed = true;
|
failed = true;
|
||||||
logger.error("Could not find ripper for URL " + url);
|
logger.error("Could not find ripper for URL " + url, e);
|
||||||
error("Could not find ripper for given URL");
|
error("Error: " + e.getMessage());
|
||||||
}
|
}
|
||||||
if (!failed) {
|
if (!failed) {
|
||||||
try {
|
try {
|
||||||
|
@ -21,7 +21,7 @@ import com.rarchives.ripme.utils.Utils;
|
|||||||
public class UpdateUtils {
|
public class UpdateUtils {
|
||||||
|
|
||||||
private static final Logger logger = Logger.getLogger(UpdateUtils.class);
|
private static final Logger logger = Logger.getLogger(UpdateUtils.class);
|
||||||
private static final String DEFAULT_VERSION = "1.0.66";
|
private static final String DEFAULT_VERSION = "1.0.67";
|
||||||
private static final String updateJsonURL = "http://rarchives.com/ripme.json";
|
private static final String updateJsonURL = "http://rarchives.com/ripme.json";
|
||||||
private static final String updateJarURL = "http://rarchives.com/ripme.jar";
|
private static final String updateJarURL = "http://rarchives.com/ripme.jar";
|
||||||
private static final String mainFileName = "ripme.jar";
|
private static final String mainFileName = "ripme.jar";
|
||||||
|
Loading…
Reference in New Issue
Block a user