Added retry logic for downloads, imagearn ripper

This commit is contained in:
4pr0n 2014-02-27 20:25:16 -08:00
parent 569b7fbdc4
commit 25232c0ab3
7 changed files with 124 additions and 22 deletions

View File

@ -7,6 +7,7 @@ import java.net.URL;
import org.apache.log4j.Logger;
import com.rarchives.ripme.ripper.rippers.ImagearnRipper;
import com.rarchives.ripme.ripper.rippers.ImagefapRipper;
import com.rarchives.ripme.ripper.rippers.ImgurRipper;
import com.rarchives.ripme.utils.Utils;
@ -109,6 +110,10 @@ public abstract class AbstractRipper implements RipperInterface {
AbstractRipper r = new ImgurRipper(url);
return r;
} catch (IOException e) { }
try {
AbstractRipper r = new ImagearnRipper(url);
return r;
} catch (IOException e) { }
throw new Exception("No compatible ripper found");
}
}

View File

@ -17,11 +17,13 @@ public class DownloadFileThread extends Thread {
private URL url;
private File saveAs;
private int retries;
public DownloadFileThread(URL url, File saveAs) {
super();
this.url = url;
this.saveAs = saveAs;
this.retries = Utils.getConfigInteger("download.retries", 1);
}
public void run() {
@ -36,19 +38,27 @@ public class DownloadFileThread extends Thread {
}
}
logger.info("[ ] Downloading file from: " + url);
try {
Response response;
response = Jsoup.connect(url.toExternalForm())
.ignoreContentType(true)
.execute();
FileOutputStream out = (new FileOutputStream(saveAs));
out.write(response.bodyAsBytes());
out.close();
} catch (IOException e) {
logger.error("[!] Exception while downloading file: " + url, e);
return;
}
int tries = 0; // Number of attempts to download
do {
try {
logger.info("[ ] Downloading file from: " + url + (tries > 0 ? " Retry #" + tries : ""));
tries += 1;
Response response;
response = Jsoup.connect(url.toExternalForm())
.ignoreContentType(true)
.execute();
FileOutputStream out = (new FileOutputStream(saveAs));
out.write(response.bodyAsBytes());
out.close();
break; // Download successful: break out of infinite loop
} catch (IOException e) {
logger.error("[!] Exception while downloading file: " + url + " - " + e.getMessage());
}
if (tries > this.retries) {
logger.error("[!] Exceeded maximum retries (" + this.retries + ") for URL " + url);
return;
}
} while (true);
logger.info("[+] Download completed: " + url);
}

View File

@ -24,6 +24,7 @@ public class DownloadThreadPool {
}
public void waitForThreads() {
logger.info("[ ] Waiting for threads to finish...");
threadPool.shutdown();
try {
threadPool.awaitTermination(60, TimeUnit.SECONDS);

View File

@ -0,0 +1,92 @@
package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.log4j.Logger;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import com.rarchives.ripme.ripper.AbstractRipper;
public class ImagearnRipper extends AbstractRipper {
private static final String DOMAIN = "imagearn.com",
HOST = "imagearn";
private static final Logger logger = Logger.getLogger(ImagearnRipper.class);
public ImagearnRipper(URL url) throws IOException {
super(url);
}
public boolean canRip(URL url) {
if (!url.getHost().endsWith(DOMAIN)) {
return false;
}
return true;
}
public URL sanitizeURL(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^.*imagearn.com/{1,}image.php\\?id=[0-9]{1,}.*$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
// URL points to imagearn *image*, not gallery
try {
url = getGalleryFromImage(url);
} catch (Exception e) {
logger.error("[!] " + e.getMessage(), e);
}
}
return url;
}
private URL getGalleryFromImage(URL url) throws IOException {
Document doc = Jsoup.connect(url.toExternalForm()).get();
for (Element link : doc.select("a[href~=^gallery\\.php.*$]")) {
logger.info("LINK: " + link.toString());
if (link.hasAttr("href")
&& link.attr("href").contains("gallery.php")) {
url = new URL("http://imagearn.com/" + link.attr("href"));
logger.info("[!] Found gallery from given link: " + url);
return url;
}
}
throw new IOException("Failed to find gallery at URL " + url);
}
@Override
public void rip() throws IOException {
int index = 0;
logger.info("[ ] Retrieving " + this.url.toExternalForm());
Document doc = Jsoup.connect(url.toExternalForm()).get();
for (Element thumb : doc.select("img.border")) {
String image = thumb.attr("src");
image = image.replaceAll("thumbs[0-9]*\\.imagearn\\.com/", "img.imagearn.com/imags/");
index += 1;
addURLToDownload(new URL(image), String.format("%03d_", index));
}
threadPool.waitForThreads();
}
@Override
public String getHost() {
return HOST;
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^.*imagearn.com/{1,}gallery.php\\?id=([0-9]{1,}).*$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
throw new MalformedURLException(
"Expected imagearn.com gallery formats: "
+ "imagearn.com/gallery.php?id=####..."
+ " Got: " + url);
}
}

View File

@ -71,17 +71,11 @@ public class ImagefapRipper extends AbstractRipper {
"http://x.*.fap.to/images/thumb/",
"http://fap.to/images/full/");
index += 1;
processURL(new URL(image), String.format("%03d_", index));
addURLToDownload(new URL(image), String.format("%03d_", index));
}
logger.info("[ ] Waiting for threads to finish...");
threadPool.waitForThreads();
}
public void processURL(URL url, String prefix) {
logger.debug("Found URL: " + url);
addURLToDownload(url, prefix);
}
public boolean canRip(URL url) {
if (!url.getHost().endsWith(DOMAIN)) {
return false;

View File

@ -75,7 +75,6 @@ public class ImgurRipper extends AbstractRipper {
// TODO Get all albums by user
break;
}
logger.info("[ ] Waiting for threads to finish...");
threadPool.waitForThreads();
}

View File

@ -1,2 +1,3 @@
threads.size = 5
file.overwrite = false
file.overwrite = false
download.retries = 3