2014-02-27 04:54:44 +01:00
|
|
|
package com.rarchives.ripme.ripper;
|
|
|
|
|
2014-06-25 10:14:05 +02:00
|
|
|
import java.io.BufferedInputStream;
|
2014-02-27 04:54:44 +01:00
|
|
|
import java.io.File;
|
|
|
|
import java.io.FileOutputStream;
|
|
|
|
import java.io.IOException;
|
2014-06-25 10:14:05 +02:00
|
|
|
import java.io.InputStream;
|
|
|
|
import java.io.OutputStream;
|
|
|
|
import java.net.HttpURLConnection;
|
2014-02-27 04:54:44 +01:00
|
|
|
import java.net.URL;
|
2018-05-05 11:36:00 +02:00
|
|
|
import java.net.URLConnection;
|
2014-04-23 05:48:41 +02:00
|
|
|
import java.util.HashMap;
|
|
|
|
import java.util.Map;
|
2014-02-27 04:54:44 +01:00
|
|
|
|
2015-01-11 01:03:30 +01:00
|
|
|
import javax.net.ssl.HttpsURLConnection;
|
|
|
|
|
2014-06-25 10:14:05 +02:00
|
|
|
import org.apache.commons.io.IOUtils;
|
2014-02-27 04:54:44 +01:00
|
|
|
import org.apache.log4j.Logger;
|
2014-06-03 17:52:36 +02:00
|
|
|
import org.jsoup.HttpStatusException;
|
2014-02-27 04:54:44 +01:00
|
|
|
|
2014-03-01 11:13:32 +01:00
|
|
|
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
2014-02-27 04:54:44 +01:00
|
|
|
import com.rarchives.ripme.utils.Utils;
|
|
|
|
|
2014-03-09 09:20:22 +01:00
|
|
|
/**
|
|
|
|
* Thread for downloading files.
|
|
|
|
* Includes retry logic, observer notifications, and other goodies.
|
|
|
|
*/
|
2017-10-24 16:33:28 +02:00
|
|
|
class DownloadFileThread extends Thread {
|
2014-02-27 04:54:44 +01:00
|
|
|
|
|
|
|
private static final Logger logger = Logger.getLogger(DownloadFileThread.class);
|
|
|
|
|
2014-04-23 05:48:41 +02:00
|
|
|
private String referrer = "";
|
2017-10-24 16:33:28 +02:00
|
|
|
private Map<String,String> cookies = new HashMap<>();
|
2014-04-23 05:48:41 +02:00
|
|
|
|
2014-02-27 04:54:44 +01:00
|
|
|
private URL url;
|
|
|
|
private File saveAs;
|
2014-02-28 12:04:03 +01:00
|
|
|
private String prettySaveAs;
|
2014-03-01 11:13:32 +01:00
|
|
|
private AbstractRipper observer;
|
2014-02-28 05:25:16 +01:00
|
|
|
private int retries;
|
2018-05-05 11:36:00 +02:00
|
|
|
private Boolean getFileExtFromMIME;
|
2014-02-27 04:54:44 +01:00
|
|
|
|
2014-03-13 03:29:11 +01:00
|
|
|
private final int TIMEOUT;
|
|
|
|
|
2018-05-05 11:36:00 +02:00
|
|
|
public DownloadFileThread(URL url, File saveAs, AbstractRipper observer, Boolean getFileExtFromMIME) {
|
2014-02-27 04:54:44 +01:00
|
|
|
super();
|
|
|
|
this.url = url;
|
|
|
|
this.saveAs = saveAs;
|
2014-02-28 12:04:03 +01:00
|
|
|
this.prettySaveAs = Utils.removeCWD(saveAs);
|
2014-03-01 11:13:32 +01:00
|
|
|
this.observer = observer;
|
2014-02-28 05:25:16 +01:00
|
|
|
this.retries = Utils.getConfigInteger("download.retries", 1);
|
2014-03-13 03:29:11 +01:00
|
|
|
this.TIMEOUT = Utils.getConfigInteger("download.timeout", 60000);
|
2018-05-05 11:36:00 +02:00
|
|
|
this.getFileExtFromMIME = getFileExtFromMIME;
|
2014-02-27 04:54:44 +01:00
|
|
|
}
|
|
|
|
|
2014-04-23 05:48:41 +02:00
|
|
|
public void setReferrer(String referrer) {
|
|
|
|
this.referrer = referrer;
|
|
|
|
}
|
|
|
|
public void setCookies(Map<String,String> cookies) {
|
|
|
|
this.cookies = cookies;
|
|
|
|
}
|
|
|
|
|
2018-06-26 06:54:18 +02:00
|
|
|
private int getTotalBytes(URL url) throws IOException {
|
|
|
|
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
|
|
|
|
conn.setRequestMethod("HEAD");
|
|
|
|
conn.setRequestProperty("accept", "*/*");
|
|
|
|
conn.setRequestProperty("Referer", this.url.toExternalForm()); // Sic
|
|
|
|
conn.setRequestProperty("User-agent", AbstractRipper.USER_AGENT);
|
|
|
|
return conn.getContentLength();
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2014-03-09 09:20:22 +01:00
|
|
|
/**
|
|
|
|
* Attempts to download the file. Retries as needed.
|
|
|
|
* Notifies observers upon completion/error/warn.
|
|
|
|
*/
|
2014-02-27 04:54:44 +01:00
|
|
|
public void run() {
|
2014-04-18 07:11:37 +02:00
|
|
|
try {
|
|
|
|
observer.stopCheck();
|
|
|
|
} catch (IOException e) {
|
|
|
|
observer.downloadErrored(url, "Download interrupted");
|
|
|
|
return;
|
|
|
|
}
|
2014-02-27 04:54:44 +01:00
|
|
|
if (saveAs.exists()) {
|
|
|
|
if (Utils.getConfigBoolean("file.overwrite", false)) {
|
2014-03-01 11:13:32 +01:00
|
|
|
logger.info("[!] Deleting existing file" + prettySaveAs);
|
2014-02-27 04:54:44 +01:00
|
|
|
saveAs.delete();
|
|
|
|
} else {
|
2014-02-28 12:04:03 +01:00
|
|
|
logger.info("[!] Skipping " + url + " -- file already exists: " + prettySaveAs);
|
2015-01-11 14:11:10 +01:00
|
|
|
observer.downloadExists(url, saveAs);
|
2014-02-27 04:54:44 +01:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-06-26 06:54:18 +02:00
|
|
|
int bytesTotal, bytesDownloaded = 0;
|
|
|
|
if (observer.useByteProgessBar()) {
|
|
|
|
try {
|
|
|
|
bytesTotal = getTotalBytes(this.url);
|
|
|
|
} catch (IOException e) {
|
|
|
|
logger.error("Failed to get file size at " + this.url, e);
|
|
|
|
observer.downloadErrored(this.url, "Failed to get file size of " + this.url);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
observer.setBytesTotal(bytesTotal);
|
|
|
|
observer.sendUpdate(STATUS.TOTAL_BYTES, bytesTotal);
|
|
|
|
logger.debug("Size of file at " + this.url + " = " + bytesTotal + "b");
|
|
|
|
}
|
|
|
|
|
2015-01-11 01:03:30 +01:00
|
|
|
URL urlToDownload = this.url;
|
|
|
|
boolean redirected = false;
|
2014-02-28 05:25:16 +01:00
|
|
|
int tries = 0; // Number of attempts to download
|
|
|
|
do {
|
2014-06-25 10:14:05 +02:00
|
|
|
tries += 1;
|
|
|
|
InputStream bis = null; OutputStream fos = null;
|
2014-02-28 05:25:16 +01:00
|
|
|
try {
|
2015-01-11 01:03:30 +01:00
|
|
|
logger.info(" Downloading file: " + urlToDownload + (tries > 0 ? " Retry #" + tries : ""));
|
2014-03-01 11:13:32 +01:00
|
|
|
observer.sendUpdate(STATUS.DOWNLOAD_STARTED, url.toExternalForm());
|
2014-06-25 10:14:05 +02:00
|
|
|
|
|
|
|
// Setup HTTP request
|
2015-01-11 01:03:30 +01:00
|
|
|
HttpURLConnection huc;
|
|
|
|
if (this.url.toString().startsWith("https")) {
|
|
|
|
huc = (HttpsURLConnection) urlToDownload.openConnection();
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
huc = (HttpURLConnection) urlToDownload.openConnection();
|
|
|
|
}
|
|
|
|
huc.setInstanceFollowRedirects(true);
|
2014-06-25 10:14:05 +02:00
|
|
|
huc.setConnectTimeout(TIMEOUT);
|
|
|
|
huc.setRequestProperty("accept", "*/*");
|
2015-01-13 09:39:52 +01:00
|
|
|
if (!referrer.equals("")) {
|
|
|
|
huc.setRequestProperty("Referer", referrer); // Sic
|
|
|
|
}
|
2014-06-25 10:14:05 +02:00
|
|
|
huc.setRequestProperty("User-agent", AbstractRipper.USER_AGENT);
|
|
|
|
String cookie = "";
|
|
|
|
for (String key : cookies.keySet()) {
|
|
|
|
if (!cookie.equals("")) {
|
|
|
|
cookie += "; ";
|
|
|
|
}
|
|
|
|
cookie += key + "=" + cookies.get(key);
|
|
|
|
}
|
|
|
|
huc.setRequestProperty("Cookie", cookie);
|
2015-02-10 08:29:29 +01:00
|
|
|
logger.debug("Request properties: " + huc.getRequestProperties());
|
2014-06-25 10:14:05 +02:00
|
|
|
huc.connect();
|
|
|
|
|
|
|
|
int statusCode = huc.getResponseCode();
|
2015-02-10 08:29:29 +01:00
|
|
|
logger.debug("Status code: " + statusCode);
|
2015-01-11 01:03:30 +01:00
|
|
|
if (statusCode / 100 == 3) { // 3xx Redirect
|
|
|
|
if (!redirected) {
|
|
|
|
// Don't increment retries on the first redirect
|
|
|
|
tries--;
|
|
|
|
redirected = true;
|
|
|
|
}
|
|
|
|
String location = huc.getHeaderField("Location");
|
|
|
|
urlToDownload = new URL(location);
|
|
|
|
// Throw exception so download can be retried
|
|
|
|
throw new IOException("Redirect status code " + statusCode + " - redirect to " + location);
|
|
|
|
}
|
2014-06-25 10:14:05 +02:00
|
|
|
if (statusCode / 100 == 4) { // 4xx errors
|
|
|
|
logger.error("[!] Non-retriable status code " + statusCode + " while downloading from " + url);
|
|
|
|
observer.downloadErrored(url, "Non-retriable status code " + statusCode + " while downloading " + url.toExternalForm());
|
|
|
|
return; // Not retriable, drop out.
|
|
|
|
}
|
|
|
|
if (statusCode / 100 == 5) { // 5xx errors
|
|
|
|
observer.downloadErrored(url, "Retriable status code " + statusCode + " while downloading " + url.toExternalForm());
|
|
|
|
// Throw exception so download can be retried
|
|
|
|
throw new IOException("Retriable status code " + statusCode);
|
2014-04-08 07:37:21 +02:00
|
|
|
}
|
2015-01-11 01:03:30 +01:00
|
|
|
if (huc.getContentLength() == 503 && urlToDownload.getHost().endsWith("imgur.com")) {
|
2014-04-08 07:37:21 +02:00
|
|
|
// Imgur image with 503 bytes is "404"
|
|
|
|
logger.error("[!] Imgur image is 404 (503 bytes long): " + url);
|
|
|
|
observer.downloadErrored(url, "Imgur image is 404: " + url.toExternalForm());
|
|
|
|
return;
|
|
|
|
}
|
2014-06-25 10:14:05 +02:00
|
|
|
// Save file
|
|
|
|
bis = new BufferedInputStream(huc.getInputStream());
|
2018-05-05 11:36:00 +02:00
|
|
|
|
|
|
|
// Check if we should get the file ext from the MIME type
|
|
|
|
if (getFileExtFromMIME) {
|
|
|
|
String fileExt = URLConnection.guessContentTypeFromStream(bis).replaceAll("image/", "");
|
|
|
|
saveAs = new File(saveAs.toString() + "." + fileExt);
|
|
|
|
}
|
|
|
|
|
2014-06-25 10:14:05 +02:00
|
|
|
fos = new FileOutputStream(saveAs);
|
2018-06-26 06:54:18 +02:00
|
|
|
byte[] data = new byte[1024 * 256]; int bytesRead;
|
|
|
|
while ( (bytesRead = bis.read(data)) != -1) {
|
|
|
|
try {
|
|
|
|
observer.stopCheck();
|
|
|
|
} catch (IOException e) {
|
|
|
|
observer.downloadErrored(url, "Download interrupted");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
fos.write(data, 0, bytesRead);
|
|
|
|
if (observer.useByteProgessBar()) {
|
|
|
|
bytesDownloaded += bytesRead;
|
|
|
|
observer.setBytesCompleted(bytesDownloaded);
|
|
|
|
observer.sendUpdate(STATUS.COMPLETED_BYTES, bytesDownloaded);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
bis.close();
|
|
|
|
fos.close();
|
2014-02-28 05:25:16 +01:00
|
|
|
break; // Download successful: break out of infinite loop
|
2014-06-05 05:31:11 +02:00
|
|
|
} catch (HttpStatusException hse) {
|
2015-02-10 08:29:29 +01:00
|
|
|
logger.debug("HTTP status exception", hse);
|
2015-01-11 01:03:30 +01:00
|
|
|
logger.error("[!] HTTP status " + hse.getStatusCode() + " while downloading from " + urlToDownload);
|
2014-06-05 05:31:11 +02:00
|
|
|
if (hse.getStatusCode() == 404 && Utils.getConfigBoolean("errors.skip404", false)) {
|
2014-06-25 10:14:05 +02:00
|
|
|
observer.downloadErrored(url, "HTTP status code " + hse.getStatusCode() + " while downloading " + url.toExternalForm());
|
2014-06-05 05:31:11 +02:00
|
|
|
return;
|
|
|
|
}
|
2014-02-28 05:25:16 +01:00
|
|
|
} catch (IOException e) {
|
2015-02-10 08:29:29 +01:00
|
|
|
logger.debug("IOException", e);
|
2015-01-11 01:03:30 +01:00
|
|
|
logger.error("[!] Exception while downloading file: " + url + " - " + e.getMessage());
|
2014-06-25 10:14:05 +02:00
|
|
|
} finally {
|
|
|
|
// Close any open streams
|
|
|
|
try {
|
|
|
|
if (bis != null) { bis.close(); }
|
|
|
|
} catch (IOException e) { }
|
|
|
|
try {
|
|
|
|
if (fos != null) { fos.close(); }
|
|
|
|
} catch (IOException e) { }
|
2014-02-28 05:25:16 +01:00
|
|
|
}
|
|
|
|
if (tries > this.retries) {
|
|
|
|
logger.error("[!] Exceeded maximum retries (" + this.retries + ") for URL " + url);
|
2014-03-01 11:13:32 +01:00
|
|
|
observer.downloadErrored(url, "Failed to download " + url.toExternalForm());
|
2014-02-28 05:25:16 +01:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
} while (true);
|
2014-03-01 11:13:32 +01:00
|
|
|
observer.downloadCompleted(url, saveAs);
|
2014-02-28 12:04:03 +01:00
|
|
|
logger.info("[+] Saved " + url + " as " + this.prettySaveAs);
|
2014-02-27 04:54:44 +01:00
|
|
|
}
|
|
|
|
|
2014-06-03 17:52:36 +02:00
|
|
|
}
|