Download via stream instead of Jsoup for #80
This commit is contained in:
parent
f8f3067099
commit
104cbead5a
@ -1,15 +1,18 @@
|
|||||||
package com.rarchives.ripme.ripper;
|
package com.rarchives.ripme.ripper;
|
||||||
|
|
||||||
|
import java.io.BufferedInputStream;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileOutputStream;
|
import java.io.FileOutputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.io.OutputStream;
|
||||||
|
import java.net.HttpURLConnection;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.jsoup.Connection.Response;
|
|
||||||
import org.jsoup.Jsoup;
|
|
||||||
import org.jsoup.HttpStatusException;
|
import org.jsoup.HttpStatusException;
|
||||||
|
|
||||||
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
||||||
@ -33,7 +36,6 @@ public class DownloadFileThread extends Thread {
|
|||||||
private int retries;
|
private int retries;
|
||||||
|
|
||||||
private final int TIMEOUT;
|
private final int TIMEOUT;
|
||||||
private final int MAX_BODY_SIZE;
|
|
||||||
|
|
||||||
public DownloadFileThread(URL url, File saveAs, AbstractRipper observer) {
|
public DownloadFileThread(URL url, File saveAs, AbstractRipper observer) {
|
||||||
super();
|
super();
|
||||||
@ -43,7 +45,6 @@ public class DownloadFileThread extends Thread {
|
|||||||
this.observer = observer;
|
this.observer = observer;
|
||||||
this.retries = Utils.getConfigInteger("download.retries", 1);
|
this.retries = Utils.getConfigInteger("download.retries", 1);
|
||||||
this.TIMEOUT = Utils.getConfigInteger("download.timeout", 60000);
|
this.TIMEOUT = Utils.getConfigInteger("download.timeout", 60000);
|
||||||
this.MAX_BODY_SIZE = Utils.getConfigInteger("download.max_bytes", 1024 * 1024 * 100);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setReferrer(String referrer) {
|
public void setReferrer(String referrer) {
|
||||||
@ -77,44 +78,67 @@ public class DownloadFileThread extends Thread {
|
|||||||
|
|
||||||
int tries = 0; // Number of attempts to download
|
int tries = 0; // Number of attempts to download
|
||||||
do {
|
do {
|
||||||
|
tries += 1;
|
||||||
|
InputStream bis = null; OutputStream fos = null;
|
||||||
try {
|
try {
|
||||||
logger.info(" Downloading file: " + url + (tries > 0 ? " Retry #" + tries : ""));
|
logger.info(" Downloading file: " + url + (tries > 0 ? " Retry #" + tries : ""));
|
||||||
observer.sendUpdate(STATUS.DOWNLOAD_STARTED, url.toExternalForm());
|
observer.sendUpdate(STATUS.DOWNLOAD_STARTED, url.toExternalForm());
|
||||||
tries += 1;
|
|
||||||
Response response;
|
// Setup HTTP request
|
||||||
response = Jsoup.connect(url.toExternalForm())
|
HttpURLConnection huc = (HttpURLConnection) this.url.openConnection();
|
||||||
.ignoreContentType(true)
|
huc.setConnectTimeout(TIMEOUT);
|
||||||
.userAgent(AbstractRipper.USER_AGENT)
|
huc.setRequestProperty("accept", "*/*");
|
||||||
.header("accept", "*/*")
|
huc.setRequestProperty("Referer", referrer); // Sic
|
||||||
.timeout(TIMEOUT)
|
huc.setRequestProperty("User-agent", AbstractRipper.USER_AGENT);
|
||||||
.maxBodySize(MAX_BODY_SIZE)
|
String cookie = "";
|
||||||
.cookies(cookies)
|
for (String key : cookies.keySet()) {
|
||||||
.referrer(referrer)
|
if (!cookie.equals("")) {
|
||||||
.execute();
|
cookie += "; ";
|
||||||
if (response.statusCode() != 200) {
|
}
|
||||||
logger.error("[!] Non-OK status code " + response.statusCode() + " while downloading from " + url);
|
cookie += key + "=" + cookies.get(key);
|
||||||
observer.downloadErrored(url, "Non-OK status code " + response.statusCode() + " while downloading " + url.toExternalForm());
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
byte[] bytes = response.bodyAsBytes();
|
huc.setRequestProperty("Cookie", cookie);
|
||||||
if (bytes.length == 503 && url.getHost().endsWith("imgur.com")) {
|
huc.connect();
|
||||||
|
|
||||||
|
int statusCode = huc.getResponseCode();
|
||||||
|
if (statusCode / 100 == 4) { // 4xx errors
|
||||||
|
logger.error("[!] Non-retriable status code " + statusCode + " while downloading from " + url);
|
||||||
|
observer.downloadErrored(url, "Non-retriable status code " + statusCode + " while downloading " + url.toExternalForm());
|
||||||
|
return; // Not retriable, drop out.
|
||||||
|
}
|
||||||
|
if (statusCode / 100 == 5) { // 5xx errors
|
||||||
|
observer.downloadErrored(url, "Retriable status code " + statusCode + " while downloading " + url.toExternalForm());
|
||||||
|
// Throw exception so download can be retried
|
||||||
|
throw new IOException("Retriable status code " + statusCode);
|
||||||
|
}
|
||||||
|
if (huc.getContentLength() == 503 && url.getHost().endsWith("imgur.com")) {
|
||||||
// Imgur image with 503 bytes is "404"
|
// Imgur image with 503 bytes is "404"
|
||||||
logger.error("[!] Imgur image is 404 (503 bytes long): " + url);
|
logger.error("[!] Imgur image is 404 (503 bytes long): " + url);
|
||||||
observer.downloadErrored(url, "Imgur image is 404: " + url.toExternalForm());
|
observer.downloadErrored(url, "Imgur image is 404: " + url.toExternalForm());
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
FileOutputStream out = new FileOutputStream(saveAs);
|
|
||||||
out.write(response.bodyAsBytes());
|
// Save file
|
||||||
out.close();
|
bis = new BufferedInputStream(huc.getInputStream());
|
||||||
|
fos = new FileOutputStream(saveAs);
|
||||||
|
IOUtils.copy(bis, fos);
|
||||||
break; // Download successful: break out of infinite loop
|
break; // Download successful: break out of infinite loop
|
||||||
} catch (HttpStatusException hse) {
|
} catch (HttpStatusException hse) {
|
||||||
logger.error("[!] HTTP status " + hse.getStatusCode() + " while downloading from " + url);
|
logger.error("[!] HTTP status " + hse.getStatusCode() + " while downloading from " + url);
|
||||||
observer.downloadErrored(url, "HTTP status code " + hse.getStatusCode() + " while downloading " + url.toExternalForm());
|
|
||||||
if (hse.getStatusCode() == 404 && Utils.getConfigBoolean("errors.skip404", false)) {
|
if (hse.getStatusCode() == 404 && Utils.getConfigBoolean("errors.skip404", false)) {
|
||||||
|
observer.downloadErrored(url, "HTTP status code " + hse.getStatusCode() + " while downloading " + url.toExternalForm());
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
logger.error("[!] Exception while downloading file: " + url + " - " + e.getMessage(), e);
|
logger.error("[!] Exception while downloading file: " + url + " - " + e.getMessage(), e);
|
||||||
|
} finally {
|
||||||
|
// Close any open streams
|
||||||
|
try {
|
||||||
|
if (bis != null) { bis.close(); }
|
||||||
|
} catch (IOException e) { }
|
||||||
|
try {
|
||||||
|
if (fos != null) { fos.close(); }
|
||||||
|
} catch (IOException e) { }
|
||||||
}
|
}
|
||||||
if (tries > this.retries) {
|
if (tries > this.retries) {
|
||||||
logger.error("[!] Exceeded maximum retries (" + this.retries + ") for URL " + url);
|
logger.error("[!] Exceeded maximum retries (" + this.retries + ") for URL " + url);
|
||||||
|
Loading…
Reference in New Issue
Block a user