Fixed tumblr url redirect

Tumblr ripper did not account for potential none image urls in the
original_size field.  This is caused by larger images being sent to AWS
 If there is no extension i now get the response’s url and download the
file from AWS
This commit is contained in:
CodeDestroyer 2014-07-31 23:49:56 -04:00
parent da8871ec57
commit 0499cd674c

View File

@ -5,7 +5,6 @@ import java.net.MalformedURLException;
import java.net.URL;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang.StringUtils;
import org.json.JSONArray;
import org.json.JSONObject;
@ -18,7 +17,8 @@ import com.rarchives.ripme.utils.Utils;
public class TumblrRipper extends AlbumRipper {
private static final String DOMAIN = "tumblr.com",
HOST = "tumblr";
HOST = "tumblr",
IMAGE_PATTERN = "([^\\s]+(\\.(?i)(jpg|png|gif|bmp))$)";
private enum ALBUM_TYPE {
SUBDOMAIN,
@ -120,6 +120,10 @@ public class TumblrRipper extends AlbumRipper {
private boolean handleJSON(JSONObject json) {
JSONArray posts, photos;
JSONObject post, photo;
Pattern p;
Matcher m;
p = Pattern.compile(IMAGE_PATTERN);
URL fileURL;
posts = json.getJSONObject("response").getJSONArray("posts");
@ -136,7 +140,13 @@ public class TumblrRipper extends AlbumRipper {
photo = photos.getJSONObject(j);
try {
fileURL = new URL(photo.getJSONObject("original_size").getString("url"));
addURLToDownload(fileURL);
m = p.matcher(fileURL.toString());
if(m.matches()) {
addURLToDownload(fileURL);
} else{
URL redirectedURL = Http.url(fileURL).ignoreContentType().response().url();
addURLToDownload(redirectedURL);
}
} catch (Exception e) {
logger.error("[!] Error while parsing photo in " + photo, e);
continue;