diff --git a/src/main/java/com/rarchives/ripme/ripper/DownloadFileThread.java b/src/main/java/com/rarchives/ripme/ripper/DownloadFileThread.java index 1793e99e..7a1730f7 100644 --- a/src/main/java/com/rarchives/ripme/ripper/DownloadFileThread.java +++ b/src/main/java/com/rarchives/ripme/ripper/DownloadFileThread.java @@ -1,15 +1,18 @@ package com.rarchives.ripme.ripper; +import java.io.BufferedInputStream; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.net.HttpURLConnection; import java.net.URL; import java.util.HashMap; import java.util.Map; +import org.apache.commons.io.IOUtils; import org.apache.log4j.Logger; -import org.jsoup.Connection.Response; -import org.jsoup.Jsoup; import org.jsoup.HttpStatusException; import com.rarchives.ripme.ui.RipStatusMessage.STATUS; @@ -33,7 +36,6 @@ public class DownloadFileThread extends Thread { private int retries; private final int TIMEOUT; - private final int MAX_BODY_SIZE; public DownloadFileThread(URL url, File saveAs, AbstractRipper observer) { super(); @@ -43,7 +45,6 @@ public class DownloadFileThread extends Thread { this.observer = observer; this.retries = Utils.getConfigInteger("download.retries", 1); this.TIMEOUT = Utils.getConfigInteger("download.timeout", 60000); - this.MAX_BODY_SIZE = Utils.getConfigInteger("download.max_bytes", 1024 * 1024 * 100); } public void setReferrer(String referrer) { @@ -77,44 +78,67 @@ public class DownloadFileThread extends Thread { int tries = 0; // Number of attempts to download do { + tries += 1; + InputStream bis = null; OutputStream fos = null; try { logger.info(" Downloading file: " + url + (tries > 0 ? " Retry #" + tries : "")); observer.sendUpdate(STATUS.DOWNLOAD_STARTED, url.toExternalForm()); - tries += 1; - Response response; - response = Jsoup.connect(url.toExternalForm()) - .ignoreContentType(true) - .userAgent(AbstractRipper.USER_AGENT) - .header("accept", "*/*") - .timeout(TIMEOUT) - .maxBodySize(MAX_BODY_SIZE) - .cookies(cookies) - .referrer(referrer) - .execute(); - if (response.statusCode() != 200) { - logger.error("[!] Non-OK status code " + response.statusCode() + " while downloading from " + url); - observer.downloadErrored(url, "Non-OK status code " + response.statusCode() + " while downloading " + url.toExternalForm()); - return; + + // Setup HTTP request + HttpURLConnection huc = (HttpURLConnection) this.url.openConnection(); + huc.setConnectTimeout(TIMEOUT); + huc.setRequestProperty("accept", "*/*"); + huc.setRequestProperty("Referer", referrer); // Sic + huc.setRequestProperty("User-agent", AbstractRipper.USER_AGENT); + String cookie = ""; + for (String key : cookies.keySet()) { + if (!cookie.equals("")) { + cookie += "; "; + } + cookie += key + "=" + cookies.get(key); } - byte[] bytes = response.bodyAsBytes(); - if (bytes.length == 503 && url.getHost().endsWith("imgur.com")) { + huc.setRequestProperty("Cookie", cookie); + huc.connect(); + + int statusCode = huc.getResponseCode(); + if (statusCode / 100 == 4) { // 4xx errors + logger.error("[!] Non-retriable status code " + statusCode + " while downloading from " + url); + observer.downloadErrored(url, "Non-retriable status code " + statusCode + " while downloading " + url.toExternalForm()); + return; // Not retriable, drop out. + } + if (statusCode / 100 == 5) { // 5xx errors + observer.downloadErrored(url, "Retriable status code " + statusCode + " while downloading " + url.toExternalForm()); + // Throw exception so download can be retried + throw new IOException("Retriable status code " + statusCode); + } + if (huc.getContentLength() == 503 && url.getHost().endsWith("imgur.com")) { // Imgur image with 503 bytes is "404" logger.error("[!] Imgur image is 404 (503 bytes long): " + url); observer.downloadErrored(url, "Imgur image is 404: " + url.toExternalForm()); return; } - FileOutputStream out = new FileOutputStream(saveAs); - out.write(response.bodyAsBytes()); - out.close(); + + // Save file + bis = new BufferedInputStream(huc.getInputStream()); + fos = new FileOutputStream(saveAs); + IOUtils.copy(bis, fos); break; // Download successful: break out of infinite loop } catch (HttpStatusException hse) { logger.error("[!] HTTP status " + hse.getStatusCode() + " while downloading from " + url); - observer.downloadErrored(url, "HTTP status code " + hse.getStatusCode() + " while downloading " + url.toExternalForm()); if (hse.getStatusCode() == 404 && Utils.getConfigBoolean("errors.skip404", false)) { + observer.downloadErrored(url, "HTTP status code " + hse.getStatusCode() + " while downloading " + url.toExternalForm()); return; } } catch (IOException e) { logger.error("[!] Exception while downloading file: " + url + " - " + e.getMessage(), e); + } finally { + // Close any open streams + try { + if (bis != null) { bis.close(); } + } catch (IOException e) { } + try { + if (fos != null) { fos.close(); } + } catch (IOException e) { } } if (tries > this.retries) { logger.error("[!] Exceeded maximum retries (" + this.retries + ") for URL " + url);