Added retry logic for downloads, imagearn ripper

This commit is contained in:
4pr0n 2014-02-27 20:25:16 -08:00
parent 569b7fbdc4
commit 25232c0ab3
7 changed files with 124 additions and 22 deletions

View File

@ -7,6 +7,7 @@ import java.net.URL;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import com.rarchives.ripme.ripper.rippers.ImagearnRipper;
import com.rarchives.ripme.ripper.rippers.ImagefapRipper; import com.rarchives.ripme.ripper.rippers.ImagefapRipper;
import com.rarchives.ripme.ripper.rippers.ImgurRipper; import com.rarchives.ripme.ripper.rippers.ImgurRipper;
import com.rarchives.ripme.utils.Utils; import com.rarchives.ripme.utils.Utils;
@ -109,6 +110,10 @@ public abstract class AbstractRipper implements RipperInterface {
AbstractRipper r = new ImgurRipper(url); AbstractRipper r = new ImgurRipper(url);
return r; return r;
} catch (IOException e) { } } catch (IOException e) { }
try {
AbstractRipper r = new ImagearnRipper(url);
return r;
} catch (IOException e) { }
throw new Exception("No compatible ripper found"); throw new Exception("No compatible ripper found");
} }
} }

View File

@ -17,11 +17,13 @@ public class DownloadFileThread extends Thread {
private URL url; private URL url;
private File saveAs; private File saveAs;
private int retries;
public DownloadFileThread(URL url, File saveAs) { public DownloadFileThread(URL url, File saveAs) {
super(); super();
this.url = url; this.url = url;
this.saveAs = saveAs; this.saveAs = saveAs;
this.retries = Utils.getConfigInteger("download.retries", 1);
} }
public void run() { public void run() {
@ -36,8 +38,11 @@ public class DownloadFileThread extends Thread {
} }
} }
logger.info("[ ] Downloading file from: " + url); int tries = 0; // Number of attempts to download
do {
try { try {
logger.info("[ ] Downloading file from: " + url + (tries > 0 ? " Retry #" + tries : ""));
tries += 1;
Response response; Response response;
response = Jsoup.connect(url.toExternalForm()) response = Jsoup.connect(url.toExternalForm())
.ignoreContentType(true) .ignoreContentType(true)
@ -45,10 +50,15 @@ public class DownloadFileThread extends Thread {
FileOutputStream out = (new FileOutputStream(saveAs)); FileOutputStream out = (new FileOutputStream(saveAs));
out.write(response.bodyAsBytes()); out.write(response.bodyAsBytes());
out.close(); out.close();
break; // Download successful: break out of infinite loop
} catch (IOException e) { } catch (IOException e) {
logger.error("[!] Exception while downloading file: " + url, e); logger.error("[!] Exception while downloading file: " + url + " - " + e.getMessage());
}
if (tries > this.retries) {
logger.error("[!] Exceeded maximum retries (" + this.retries + ") for URL " + url);
return; return;
} }
} while (true);
logger.info("[+] Download completed: " + url); logger.info("[+] Download completed: " + url);
} }

View File

@ -24,6 +24,7 @@ public class DownloadThreadPool {
} }
public void waitForThreads() { public void waitForThreads() {
logger.info("[ ] Waiting for threads to finish...");
threadPool.shutdown(); threadPool.shutdown();
try { try {
threadPool.awaitTermination(60, TimeUnit.SECONDS); threadPool.awaitTermination(60, TimeUnit.SECONDS);

View File

@ -0,0 +1,92 @@
package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.log4j.Logger;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import com.rarchives.ripme.ripper.AbstractRipper;
public class ImagearnRipper extends AbstractRipper {
private static final String DOMAIN = "imagearn.com",
HOST = "imagearn";
private static final Logger logger = Logger.getLogger(ImagearnRipper.class);
public ImagearnRipper(URL url) throws IOException {
super(url);
}
public boolean canRip(URL url) {
if (!url.getHost().endsWith(DOMAIN)) {
return false;
}
return true;
}
public URL sanitizeURL(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^.*imagearn.com/{1,}image.php\\?id=[0-9]{1,}.*$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
// URL points to imagearn *image*, not gallery
try {
url = getGalleryFromImage(url);
} catch (Exception e) {
logger.error("[!] " + e.getMessage(), e);
}
}
return url;
}
private URL getGalleryFromImage(URL url) throws IOException {
Document doc = Jsoup.connect(url.toExternalForm()).get();
for (Element link : doc.select("a[href~=^gallery\\.php.*$]")) {
logger.info("LINK: " + link.toString());
if (link.hasAttr("href")
&& link.attr("href").contains("gallery.php")) {
url = new URL("http://imagearn.com/" + link.attr("href"));
logger.info("[!] Found gallery from given link: " + url);
return url;
}
}
throw new IOException("Failed to find gallery at URL " + url);
}
@Override
public void rip() throws IOException {
int index = 0;
logger.info("[ ] Retrieving " + this.url.toExternalForm());
Document doc = Jsoup.connect(url.toExternalForm()).get();
for (Element thumb : doc.select("img.border")) {
String image = thumb.attr("src");
image = image.replaceAll("thumbs[0-9]*\\.imagearn\\.com/", "img.imagearn.com/imags/");
index += 1;
addURLToDownload(new URL(image), String.format("%03d_", index));
}
threadPool.waitForThreads();
}
@Override
public String getHost() {
return HOST;
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^.*imagearn.com/{1,}gallery.php\\?id=([0-9]{1,}).*$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
throw new MalformedURLException(
"Expected imagearn.com gallery formats: "
+ "imagearn.com/gallery.php?id=####..."
+ " Got: " + url);
}
}

View File

@ -71,17 +71,11 @@ public class ImagefapRipper extends AbstractRipper {
"http://x.*.fap.to/images/thumb/", "http://x.*.fap.to/images/thumb/",
"http://fap.to/images/full/"); "http://fap.to/images/full/");
index += 1; index += 1;
processURL(new URL(image), String.format("%03d_", index)); addURLToDownload(new URL(image), String.format("%03d_", index));
} }
logger.info("[ ] Waiting for threads to finish...");
threadPool.waitForThreads(); threadPool.waitForThreads();
} }
public void processURL(URL url, String prefix) {
logger.debug("Found URL: " + url);
addURLToDownload(url, prefix);
}
public boolean canRip(URL url) { public boolean canRip(URL url) {
if (!url.getHost().endsWith(DOMAIN)) { if (!url.getHost().endsWith(DOMAIN)) {
return false; return false;

View File

@ -75,7 +75,6 @@ public class ImgurRipper extends AbstractRipper {
// TODO Get all albums by user // TODO Get all albums by user
break; break;
} }
logger.info("[ ] Waiting for threads to finish...");
threadPool.waitForThreads(); threadPool.waitForThreads();
} }

View File

@ -1,2 +1,3 @@
threads.size = 5 threads.size = 5
file.overwrite = false file.overwrite = false
download.retries = 3