From 25232c0ab39f2121baecdd2388d72b81a9b61eaa Mon Sep 17 00:00:00 2001 From: 4pr0n Date: Thu, 27 Feb 2014 20:25:16 -0800 Subject: [PATCH] Added retry logic for downloads, imagearn ripper --- .../ripme/ripper/AbstractRipper.java | 5 + .../ripme/ripper/DownloadFileThread.java | 36 +++++--- .../ripme/ripper/DownloadThreadPool.java | 1 + .../ripme/ripper/rippers/ImagearnRipper.java | 92 +++++++++++++++++++ .../ripme/ripper/rippers/ImagefapRipper.java | 8 +- .../ripme/ripper/rippers/ImgurRipper.java | 1 - src/main/resources/rip.properties | 3 +- 7 files changed, 124 insertions(+), 22 deletions(-) create mode 100644 src/main/java/com/rarchives/ripme/ripper/rippers/ImagearnRipper.java diff --git a/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java b/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java index 42515984..d398d187 100644 --- a/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java @@ -7,6 +7,7 @@ import java.net.URL; import org.apache.log4j.Logger; +import com.rarchives.ripme.ripper.rippers.ImagearnRipper; import com.rarchives.ripme.ripper.rippers.ImagefapRipper; import com.rarchives.ripme.ripper.rippers.ImgurRipper; import com.rarchives.ripme.utils.Utils; @@ -109,6 +110,10 @@ public abstract class AbstractRipper implements RipperInterface { AbstractRipper r = new ImgurRipper(url); return r; } catch (IOException e) { } + try { + AbstractRipper r = new ImagearnRipper(url); + return r; + } catch (IOException e) { } throw new Exception("No compatible ripper found"); } } \ No newline at end of file diff --git a/src/main/java/com/rarchives/ripme/ripper/DownloadFileThread.java b/src/main/java/com/rarchives/ripme/ripper/DownloadFileThread.java index 20287982..820a12b3 100644 --- a/src/main/java/com/rarchives/ripme/ripper/DownloadFileThread.java +++ b/src/main/java/com/rarchives/ripme/ripper/DownloadFileThread.java @@ -17,11 +17,13 @@ public class DownloadFileThread extends Thread { private URL url; private File saveAs; + private int retries; public DownloadFileThread(URL url, File saveAs) { super(); this.url = url; this.saveAs = saveAs; + this.retries = Utils.getConfigInteger("download.retries", 1); } public void run() { @@ -36,19 +38,27 @@ public class DownloadFileThread extends Thread { } } - logger.info("[ ] Downloading file from: " + url); - try { - Response response; - response = Jsoup.connect(url.toExternalForm()) - .ignoreContentType(true) - .execute(); - FileOutputStream out = (new FileOutputStream(saveAs)); - out.write(response.bodyAsBytes()); - out.close(); - } catch (IOException e) { - logger.error("[!] Exception while downloading file: " + url, e); - return; - } + int tries = 0; // Number of attempts to download + do { + try { + logger.info("[ ] Downloading file from: " + url + (tries > 0 ? " Retry #" + tries : "")); + tries += 1; + Response response; + response = Jsoup.connect(url.toExternalForm()) + .ignoreContentType(true) + .execute(); + FileOutputStream out = (new FileOutputStream(saveAs)); + out.write(response.bodyAsBytes()); + out.close(); + break; // Download successful: break out of infinite loop + } catch (IOException e) { + logger.error("[!] Exception while downloading file: " + url + " - " + e.getMessage()); + } + if (tries > this.retries) { + logger.error("[!] Exceeded maximum retries (" + this.retries + ") for URL " + url); + return; + } + } while (true); logger.info("[+] Download completed: " + url); } diff --git a/src/main/java/com/rarchives/ripme/ripper/DownloadThreadPool.java b/src/main/java/com/rarchives/ripme/ripper/DownloadThreadPool.java index e0a93404..1c5a0ac6 100644 --- a/src/main/java/com/rarchives/ripme/ripper/DownloadThreadPool.java +++ b/src/main/java/com/rarchives/ripme/ripper/DownloadThreadPool.java @@ -24,6 +24,7 @@ public class DownloadThreadPool { } public void waitForThreads() { + logger.info("[ ] Waiting for threads to finish..."); threadPool.shutdown(); try { threadPool.awaitTermination(60, TimeUnit.SECONDS); diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ImagearnRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ImagearnRipper.java new file mode 100644 index 00000000..fce7d6d8 --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ImagearnRipper.java @@ -0,0 +1,92 @@ +package com.rarchives.ripme.ripper.rippers; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.log4j.Logger; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; + +import com.rarchives.ripme.ripper.AbstractRipper; + +public class ImagearnRipper extends AbstractRipper { + + private static final String DOMAIN = "imagearn.com", + HOST = "imagearn"; + private static final Logger logger = Logger.getLogger(ImagearnRipper.class); + + public ImagearnRipper(URL url) throws IOException { + super(url); + } + + public boolean canRip(URL url) { + if (!url.getHost().endsWith(DOMAIN)) { + return false; + } + return true; + } + + public URL sanitizeURL(URL url) throws MalformedURLException { + Pattern p = Pattern.compile("^.*imagearn.com/{1,}image.php\\?id=[0-9]{1,}.*$"); + Matcher m = p.matcher(url.toExternalForm()); + if (m.matches()) { + // URL points to imagearn *image*, not gallery + try { + url = getGalleryFromImage(url); + } catch (Exception e) { + logger.error("[!] " + e.getMessage(), e); + } + } + return url; + } + + private URL getGalleryFromImage(URL url) throws IOException { + Document doc = Jsoup.connect(url.toExternalForm()).get(); + for (Element link : doc.select("a[href~=^gallery\\.php.*$]")) { + logger.info("LINK: " + link.toString()); + if (link.hasAttr("href") + && link.attr("href").contains("gallery.php")) { + url = new URL("http://imagearn.com/" + link.attr("href")); + logger.info("[!] Found gallery from given link: " + url); + return url; + } + } + throw new IOException("Failed to find gallery at URL " + url); + } + + @Override + public void rip() throws IOException { + int index = 0; + logger.info("[ ] Retrieving " + this.url.toExternalForm()); + Document doc = Jsoup.connect(url.toExternalForm()).get(); + for (Element thumb : doc.select("img.border")) { + String image = thumb.attr("src"); + image = image.replaceAll("thumbs[0-9]*\\.imagearn\\.com/", "img.imagearn.com/imags/"); + index += 1; + addURLToDownload(new URL(image), String.format("%03d_", index)); + } + threadPool.waitForThreads(); + } + + @Override + public String getHost() { + return HOST; + } + + @Override + public String getGID(URL url) throws MalformedURLException { + Pattern p = Pattern.compile("^.*imagearn.com/{1,}gallery.php\\?id=([0-9]{1,}).*$"); + Matcher m = p.matcher(url.toExternalForm()); + if (m.matches()) { + return m.group(1); + } + throw new MalformedURLException( + "Expected imagearn.com gallery formats: " + + "imagearn.com/gallery.php?id=####..." + + " Got: " + url); + } +} diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ImagefapRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ImagefapRipper.java index 73fa558e..baadf5ff 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/ImagefapRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ImagefapRipper.java @@ -71,17 +71,11 @@ public class ImagefapRipper extends AbstractRipper { "http://x.*.fap.to/images/thumb/", "http://fap.to/images/full/"); index += 1; - processURL(new URL(image), String.format("%03d_", index)); + addURLToDownload(new URL(image), String.format("%03d_", index)); } - logger.info("[ ] Waiting for threads to finish..."); threadPool.waitForThreads(); } - public void processURL(URL url, String prefix) { - logger.debug("Found URL: " + url); - addURLToDownload(url, prefix); - } - public boolean canRip(URL url) { if (!url.getHost().endsWith(DOMAIN)) { return false; diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ImgurRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ImgurRipper.java index 15ed1551..2e8a5406 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/ImgurRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ImgurRipper.java @@ -75,7 +75,6 @@ public class ImgurRipper extends AbstractRipper { // TODO Get all albums by user break; } - logger.info("[ ] Waiting for threads to finish..."); threadPool.waitForThreads(); } diff --git a/src/main/resources/rip.properties b/src/main/resources/rip.properties index c28c89f5..c0c4c214 100644 --- a/src/main/resources/rip.properties +++ b/src/main/resources/rip.properties @@ -1,2 +1,3 @@ threads.size = 5 -file.overwrite = false \ No newline at end of file +file.overwrite = false +download.retries = 3 \ No newline at end of file