c5ea044f79
Ability to set log level, lots of debugging messages Turn on debug logging during tests, simplified test cases for HTML ripper Fix fusktator ripper, added test Fixed gifyo, added test Added tests for *all* rippers Adding a few album-guessing URLs
182 lines
7.4 KiB
Java
182 lines
7.4 KiB
Java
package com.rarchives.ripme.ripper;
|
|
|
|
import java.io.BufferedInputStream;
|
|
import java.io.File;
|
|
import java.io.FileOutputStream;
|
|
import java.io.IOException;
|
|
import java.io.InputStream;
|
|
import java.io.OutputStream;
|
|
import java.net.HttpURLConnection;
|
|
import java.net.URL;
|
|
import java.util.HashMap;
|
|
import java.util.Map;
|
|
|
|
import javax.net.ssl.HttpsURLConnection;
|
|
|
|
import org.apache.commons.io.IOUtils;
|
|
import org.apache.log4j.Logger;
|
|
import org.jsoup.HttpStatusException;
|
|
|
|
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
|
import com.rarchives.ripme.utils.Utils;
|
|
|
|
/**
|
|
* Thread for downloading files.
|
|
* Includes retry logic, observer notifications, and other goodies.
|
|
*/
|
|
public class DownloadFileThread extends Thread {
|
|
|
|
private static final Logger logger = Logger.getLogger(DownloadFileThread.class);
|
|
|
|
private String referrer = "";
|
|
private Map<String,String> cookies = new HashMap<String,String>();
|
|
|
|
private URL url;
|
|
private File saveAs;
|
|
private String prettySaveAs;
|
|
private AbstractRipper observer;
|
|
private int retries;
|
|
|
|
private final int TIMEOUT;
|
|
|
|
public DownloadFileThread(URL url, File saveAs, AbstractRipper observer) {
|
|
super();
|
|
this.url = url;
|
|
this.saveAs = saveAs;
|
|
this.prettySaveAs = Utils.removeCWD(saveAs);
|
|
this.observer = observer;
|
|
this.retries = Utils.getConfigInteger("download.retries", 1);
|
|
this.TIMEOUT = Utils.getConfigInteger("download.timeout", 60000);
|
|
}
|
|
|
|
public void setReferrer(String referrer) {
|
|
this.referrer = referrer;
|
|
}
|
|
public void setCookies(Map<String,String> cookies) {
|
|
this.cookies = cookies;
|
|
}
|
|
|
|
/**
|
|
* Attempts to download the file. Retries as needed.
|
|
* Notifies observers upon completion/error/warn.
|
|
*/
|
|
public void run() {
|
|
try {
|
|
observer.stopCheck();
|
|
} catch (IOException e) {
|
|
observer.downloadErrored(url, "Download interrupted");
|
|
return;
|
|
}
|
|
if (saveAs.exists()) {
|
|
if (Utils.getConfigBoolean("file.overwrite", false)) {
|
|
logger.info("[!] Deleting existing file" + prettySaveAs);
|
|
saveAs.delete();
|
|
} else {
|
|
logger.info("[!] Skipping " + url + " -- file already exists: " + prettySaveAs);
|
|
observer.downloadExists(url, saveAs);
|
|
return;
|
|
}
|
|
}
|
|
|
|
URL urlToDownload = this.url;
|
|
boolean redirected = false;
|
|
int tries = 0; // Number of attempts to download
|
|
do {
|
|
tries += 1;
|
|
InputStream bis = null; OutputStream fos = null;
|
|
try {
|
|
logger.info(" Downloading file: " + urlToDownload + (tries > 0 ? " Retry #" + tries : ""));
|
|
observer.sendUpdate(STATUS.DOWNLOAD_STARTED, url.toExternalForm());
|
|
|
|
// Setup HTTP request
|
|
HttpURLConnection huc;
|
|
if (this.url.toString().startsWith("https")) {
|
|
huc = (HttpsURLConnection) urlToDownload.openConnection();
|
|
}
|
|
else {
|
|
huc = (HttpURLConnection) urlToDownload.openConnection();
|
|
}
|
|
huc.setInstanceFollowRedirects(true);
|
|
huc.setConnectTimeout(TIMEOUT);
|
|
huc.setRequestProperty("accept", "*/*");
|
|
if (!referrer.equals("")) {
|
|
huc.setRequestProperty("Referer", referrer); // Sic
|
|
}
|
|
huc.setRequestProperty("User-agent", AbstractRipper.USER_AGENT);
|
|
String cookie = "";
|
|
for (String key : cookies.keySet()) {
|
|
if (!cookie.equals("")) {
|
|
cookie += "; ";
|
|
}
|
|
cookie += key + "=" + cookies.get(key);
|
|
}
|
|
huc.setRequestProperty("Cookie", cookie);
|
|
logger.debug("Request properties: " + huc.getRequestProperties());
|
|
huc.connect();
|
|
|
|
int statusCode = huc.getResponseCode();
|
|
logger.debug("Status code: " + statusCode);
|
|
if (statusCode / 100 == 3) { // 3xx Redirect
|
|
if (!redirected) {
|
|
// Don't increment retries on the first redirect
|
|
tries--;
|
|
redirected = true;
|
|
}
|
|
String location = huc.getHeaderField("Location");
|
|
urlToDownload = new URL(location);
|
|
// Throw exception so download can be retried
|
|
throw new IOException("Redirect status code " + statusCode + " - redirect to " + location);
|
|
}
|
|
if (statusCode / 100 == 4) { // 4xx errors
|
|
logger.error("[!] Non-retriable status code " + statusCode + " while downloading from " + url);
|
|
observer.downloadErrored(url, "Non-retriable status code " + statusCode + " while downloading " + url.toExternalForm());
|
|
return; // Not retriable, drop out.
|
|
}
|
|
if (statusCode / 100 == 5) { // 5xx errors
|
|
observer.downloadErrored(url, "Retriable status code " + statusCode + " while downloading " + url.toExternalForm());
|
|
// Throw exception so download can be retried
|
|
throw new IOException("Retriable status code " + statusCode);
|
|
}
|
|
if (huc.getContentLength() == 503 && urlToDownload.getHost().endsWith("imgur.com")) {
|
|
// Imgur image with 503 bytes is "404"
|
|
logger.error("[!] Imgur image is 404 (503 bytes long): " + url);
|
|
observer.downloadErrored(url, "Imgur image is 404: " + url.toExternalForm());
|
|
return;
|
|
}
|
|
|
|
// Save file
|
|
bis = new BufferedInputStream(huc.getInputStream());
|
|
fos = new FileOutputStream(saveAs);
|
|
IOUtils.copy(bis, fos);
|
|
break; // Download successful: break out of infinite loop
|
|
} catch (HttpStatusException hse) {
|
|
logger.debug("HTTP status exception", hse);
|
|
logger.error("[!] HTTP status " + hse.getStatusCode() + " while downloading from " + urlToDownload);
|
|
if (hse.getStatusCode() == 404 && Utils.getConfigBoolean("errors.skip404", false)) {
|
|
observer.downloadErrored(url, "HTTP status code " + hse.getStatusCode() + " while downloading " + url.toExternalForm());
|
|
return;
|
|
}
|
|
} catch (IOException e) {
|
|
logger.debug("IOException", e);
|
|
logger.error("[!] Exception while downloading file: " + url + " - " + e.getMessage());
|
|
} finally {
|
|
// Close any open streams
|
|
try {
|
|
if (bis != null) { bis.close(); }
|
|
} catch (IOException e) { }
|
|
try {
|
|
if (fos != null) { fos.close(); }
|
|
} catch (IOException e) { }
|
|
}
|
|
if (tries > this.retries) {
|
|
logger.error("[!] Exceeded maximum retries (" + this.retries + ") for URL " + url);
|
|
observer.downloadErrored(url, "Failed to download " + url.toExternalForm());
|
|
return;
|
|
}
|
|
} while (true);
|
|
observer.downloadCompleted(url, saveAs);
|
|
logger.info("[+] Saved " + url + " as " + this.prettySaveAs);
|
|
}
|
|
|
|
}
|