2014-02-25 10:28:22 +01:00
|
|
|
package com.rarchives.ripme.ripper;
|
|
|
|
|
2014-02-26 08:44:22 +01:00
|
|
|
import java.io.File;
|
|
|
|
import java.io.IOException;
|
2014-03-02 04:37:09 +01:00
|
|
|
import java.lang.reflect.Constructor;
|
2014-02-25 10:28:22 +01:00
|
|
|
import java.net.MalformedURLException;
|
|
|
|
import java.net.URL;
|
2014-03-02 04:37:09 +01:00
|
|
|
import java.util.ArrayList;
|
|
|
|
import java.util.Enumeration;
|
2014-03-01 11:13:32 +01:00
|
|
|
import java.util.HashMap;
|
2014-03-02 04:37:09 +01:00
|
|
|
import java.util.List;
|
2014-03-01 11:13:32 +01:00
|
|
|
import java.util.Map;
|
|
|
|
import java.util.Observable;
|
|
|
|
import java.util.Observer;
|
2014-02-25 10:28:22 +01:00
|
|
|
|
2014-02-27 04:54:44 +01:00
|
|
|
import org.apache.log4j.Logger;
|
|
|
|
|
2014-03-01 11:13:32 +01:00
|
|
|
import com.rarchives.ripme.ui.RipStatusMessage;
|
|
|
|
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
2014-02-26 08:44:22 +01:00
|
|
|
import com.rarchives.ripme.utils.Utils;
|
|
|
|
|
2014-03-01 11:13:32 +01:00
|
|
|
public abstract class AbstractRipper
|
|
|
|
extends Observable
|
2014-03-01 11:43:47 +01:00
|
|
|
implements RipperInterface, Runnable {
|
2014-02-25 10:28:22 +01:00
|
|
|
|
2014-02-27 04:54:44 +01:00
|
|
|
private static final Logger logger = Logger.getLogger(AbstractRipper.class);
|
|
|
|
|
2014-02-25 10:28:22 +01:00
|
|
|
protected URL url;
|
2014-02-27 04:54:44 +01:00
|
|
|
protected File workingDir;
|
|
|
|
protected DownloadThreadPool threadPool;
|
2014-03-01 11:13:32 +01:00
|
|
|
protected Observer observer = null;
|
|
|
|
|
|
|
|
protected int itemsTotal;
|
|
|
|
protected Map<URL, File> itemsPending = new HashMap<URL, File>();
|
|
|
|
protected Map<URL, File> itemsCompleted = new HashMap<URL, File>();
|
|
|
|
protected Map<URL, String> itemsErrored = new HashMap<URL, String>();
|
2014-03-02 03:08:16 +01:00
|
|
|
protected boolean completed = true;
|
2014-02-26 08:44:22 +01:00
|
|
|
|
|
|
|
public abstract void rip() throws IOException;
|
2014-02-27 04:54:44 +01:00
|
|
|
public abstract String getHost();
|
|
|
|
public abstract String getGID(URL url) throws MalformedURLException;
|
2014-02-25 10:28:22 +01:00
|
|
|
|
2014-02-26 08:44:22 +01:00
|
|
|
/**
|
2014-02-27 04:54:44 +01:00
|
|
|
* Ensures inheriting ripper can rip this URL, raises exception if not.
|
|
|
|
* Otherwise initializes working directory and thread pool.
|
|
|
|
*
|
2014-02-26 08:44:22 +01:00
|
|
|
* @param url
|
|
|
|
* URL to rip.
|
|
|
|
* @throws IOException
|
|
|
|
* If anything goes wrong.
|
|
|
|
*/
|
|
|
|
public AbstractRipper(URL url) throws IOException {
|
2014-02-25 10:28:22 +01:00
|
|
|
if (!canRip(url)) {
|
|
|
|
throw new MalformedURLException("Unable to rip url: " + url);
|
|
|
|
}
|
2014-02-27 04:54:44 +01:00
|
|
|
this.url = sanitizeURL(url);
|
|
|
|
setWorkingDir(url);
|
|
|
|
this.threadPool = new DownloadThreadPool();
|
|
|
|
}
|
2014-03-01 11:13:32 +01:00
|
|
|
|
|
|
|
public void setObserver(Observer obs) {
|
|
|
|
this.observer = obs;
|
|
|
|
}
|
2014-02-27 04:54:44 +01:00
|
|
|
|
2014-02-28 12:04:03 +01:00
|
|
|
/**
|
|
|
|
* Queues image to be downloaded and saved.
|
|
|
|
* Uses filename from URL to decide filename.
|
|
|
|
* @param url
|
|
|
|
* URL to download
|
|
|
|
*/
|
2014-02-27 04:54:44 +01:00
|
|
|
public void addURLToDownload(URL url) {
|
2014-02-28 12:04:03 +01:00
|
|
|
// Use empty prefix and empty subdirectory
|
|
|
|
addURLToDownload(url, "", "");
|
2014-02-27 04:54:44 +01:00
|
|
|
}
|
|
|
|
|
2014-02-28 12:04:03 +01:00
|
|
|
/**
|
|
|
|
* Queues image to be downloaded and saved.
|
|
|
|
* Uses filename from URL (and 'prefix') to decide filename.
|
|
|
|
* @param url
|
|
|
|
* URL to download
|
|
|
|
* @param prefix
|
|
|
|
* Text to append to saved filename.
|
|
|
|
*/
|
2014-02-27 04:54:44 +01:00
|
|
|
public void addURLToDownload(URL url, String prefix) {
|
2014-02-28 12:04:03 +01:00
|
|
|
// Use empty subdirectory
|
|
|
|
addURLToDownload(url, prefix, "");
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Queues image to be downloaded and saved.
|
|
|
|
* @param url
|
|
|
|
* URL of the file
|
|
|
|
* @param saveAs
|
|
|
|
* Path of the local file to save the content to.
|
|
|
|
*/
|
|
|
|
public void addURLToDownload(URL url, File saveAs) {
|
2014-03-02 03:08:16 +01:00
|
|
|
if (itemsPending.containsKey(url)
|
|
|
|
|| itemsCompleted.containsKey(url)
|
|
|
|
|| itemsErrored.containsKey(url)) {
|
2014-03-01 11:13:32 +01:00
|
|
|
// Item is already downloaded/downloading, skip it.
|
2014-03-02 03:08:16 +01:00
|
|
|
logger.info("Skipping " + url + " -- already attempted: " + Utils.removeCWD(saveAs));
|
2014-03-01 11:13:32 +01:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
itemsPending.put(url, saveAs);
|
|
|
|
threadPool.addThread(new DownloadFileThread(url, saveAs, this));
|
2014-02-28 12:04:03 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
public void addURLToDownload(URL url, String prefix, String subdirectory) {
|
2014-02-27 04:54:44 +01:00
|
|
|
String saveAs = url.toExternalForm();
|
|
|
|
saveAs = saveAs.substring(saveAs.lastIndexOf('/')+1);
|
|
|
|
if (saveAs.indexOf('?') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('?')); }
|
|
|
|
if (saveAs.indexOf('#') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('#')); }
|
|
|
|
if (saveAs.indexOf('&') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('&')); }
|
|
|
|
File saveFileAs;
|
|
|
|
try {
|
2014-02-28 12:04:03 +01:00
|
|
|
if (!subdirectory.equals("")) {
|
|
|
|
subdirectory = File.separator + subdirectory;
|
|
|
|
}
|
|
|
|
saveFileAs = new File(
|
|
|
|
workingDir.getCanonicalPath()
|
|
|
|
+ subdirectory
|
|
|
|
+ File.separator
|
|
|
|
+ prefix
|
|
|
|
+ saveAs);
|
2014-02-27 04:54:44 +01:00
|
|
|
} catch (IOException e) {
|
2014-02-28 04:49:28 +01:00
|
|
|
logger.error("[!] Error creating save file path for URL '" + url + "':", e);
|
2014-02-27 04:54:44 +01:00
|
|
|
return;
|
|
|
|
}
|
2014-02-27 10:28:23 +01:00
|
|
|
logger.debug("Downloading " + url + " to " + saveFileAs);
|
2014-02-28 12:04:03 +01:00
|
|
|
if (!saveFileAs.getParentFile().exists()) {
|
|
|
|
logger.info("[+] Creating directory: " + Utils.removeCWD(saveFileAs.getParent()));
|
|
|
|
saveFileAs.getParentFile().mkdirs();
|
|
|
|
}
|
2014-02-27 04:54:44 +01:00
|
|
|
addURLToDownload(url, saveFileAs);
|
|
|
|
}
|
2014-03-02 03:08:16 +01:00
|
|
|
|
|
|
|
protected void waitForThreads() {
|
|
|
|
completed = false;
|
|
|
|
threadPool.waitForThreads();
|
|
|
|
}
|
2014-02-25 10:28:22 +01:00
|
|
|
|
2014-03-01 11:13:32 +01:00
|
|
|
public void retrievingSource(URL url) {
|
|
|
|
RipStatusMessage msg = new RipStatusMessage(STATUS.LOADING_RESOURCE, url);
|
|
|
|
observer.update(this, msg);
|
|
|
|
observer.notifyAll();
|
|
|
|
}
|
|
|
|
|
|
|
|
public void downloadCompleted(URL url, File saveAs) {
|
2014-03-02 04:37:09 +01:00
|
|
|
if (observer == null) {
|
|
|
|
return;
|
|
|
|
}
|
2014-03-01 11:13:32 +01:00
|
|
|
try {
|
|
|
|
String path = saveAs.getCanonicalPath();
|
|
|
|
RipStatusMessage msg = new RipStatusMessage(STATUS.DOWNLOAD_COMPLETE, path);
|
|
|
|
synchronized(observer) {
|
|
|
|
itemsPending.remove(url);
|
|
|
|
itemsCompleted.put(url, saveAs);
|
|
|
|
observer.update(this, msg);
|
|
|
|
observer.notifyAll();
|
|
|
|
checkIfComplete();
|
|
|
|
}
|
|
|
|
} catch (Exception e) {
|
|
|
|
logger.error("Exception while updating observer: ", e);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
public void downloadErrored(URL url, String reason) {
|
2014-03-02 04:37:09 +01:00
|
|
|
if (observer == null) {
|
|
|
|
return;
|
|
|
|
}
|
2014-03-01 11:13:32 +01:00
|
|
|
synchronized(observer) {
|
|
|
|
itemsPending.remove(url);
|
|
|
|
itemsErrored.put(url, reason);
|
|
|
|
observer.update(this, new RipStatusMessage(STATUS.DOWNLOAD_ERRORED, url + " : " + reason));
|
|
|
|
observer.notifyAll();
|
|
|
|
checkIfComplete();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
private void checkIfComplete() {
|
2014-03-02 03:08:16 +01:00
|
|
|
System.err.println("Pending: " + itemsPending.size() + ", Completed: " + itemsCompleted.size() + ", Errored: " + itemsErrored.size());
|
|
|
|
if (!completed && itemsPending.size() == 0) {
|
|
|
|
completed = true;
|
2014-03-01 11:13:32 +01:00
|
|
|
logger.info("Rip completed!");
|
2014-03-02 03:08:16 +01:00
|
|
|
observer.update(this, new RipStatusMessage(STATUS.RIP_COMPLETE, new File(Utils.removeCWD(workingDir))));
|
2014-03-01 11:13:32 +01:00
|
|
|
observer.notifyAll();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-02-25 10:28:22 +01:00
|
|
|
public URL getURL() {
|
|
|
|
return url;
|
|
|
|
}
|
|
|
|
|
2014-02-27 04:54:44 +01:00
|
|
|
public void setWorkingDir(URL url) throws IOException {
|
|
|
|
String path = Utils.getWorkingDirectory().getCanonicalPath();
|
|
|
|
if (!path.endsWith(File.separator)) {
|
|
|
|
path += File.separator;
|
|
|
|
}
|
|
|
|
path += getHost() + "_" + getGID(this.url) + File.separator;
|
|
|
|
this.workingDir = new File(path);
|
|
|
|
if (!this.workingDir.exists()) {
|
2014-02-28 12:04:03 +01:00
|
|
|
logger.info("[+] Creating directory: " + Utils.removeCWD(this.workingDir));
|
2014-02-27 04:54:44 +01:00
|
|
|
this.workingDir.mkdirs();
|
|
|
|
}
|
|
|
|
logger.debug("Set working directory to: " + this.workingDir);
|
|
|
|
}
|
2014-03-01 11:13:32 +01:00
|
|
|
|
2014-02-27 10:28:23 +01:00
|
|
|
/**
|
|
|
|
* Finds, instantiates, and returns a compatible ripper for given URL.
|
|
|
|
* @param url
|
|
|
|
* URL to rip.
|
|
|
|
* @return
|
|
|
|
* Instantiated ripper ready to rip given URL.
|
|
|
|
* @throws Exception
|
|
|
|
* If no compatible rippers can be found.
|
|
|
|
*/
|
|
|
|
public static AbstractRipper getRipper(URL url) throws Exception {
|
2014-03-02 04:37:09 +01:00
|
|
|
for (Constructor<?> constructor : getRipperConstructors()) {
|
|
|
|
try {
|
|
|
|
AbstractRipper ripper = (AbstractRipper) constructor.newInstance(url);
|
|
|
|
return ripper;
|
|
|
|
} catch (Exception e) {
|
|
|
|
// Incompatible rippers *will* throw exceptions during instantiation.
|
|
|
|
}
|
|
|
|
}
|
2014-02-27 10:28:23 +01:00
|
|
|
throw new Exception("No compatible ripper found");
|
|
|
|
}
|
2014-03-02 04:37:09 +01:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Gets constructors for all rippers in the "ripper.rippers" package
|
|
|
|
* @return list of constructors for all rippers.
|
|
|
|
* @throws Exception
|
|
|
|
*/
|
|
|
|
private static List<Constructor<?>> getRipperConstructors() throws Exception {
|
|
|
|
List<Constructor<?>> constructors = new ArrayList<Constructor<?>>();
|
|
|
|
String rippersPackage = "com.rarchives.ripme.ripper.rippers";
|
|
|
|
ClassLoader cl = Thread.currentThread().getContextClassLoader();
|
|
|
|
Enumeration<URL> urls = cl.getResources(rippersPackage.replaceAll("\\.", "/"));
|
|
|
|
if (!urls.hasMoreElements()) {
|
|
|
|
return constructors;
|
|
|
|
}
|
|
|
|
URL classURL = urls.nextElement();
|
|
|
|
for (File f : new File(classURL.toURI()).listFiles()) {
|
|
|
|
String className = f.getName();
|
|
|
|
if (!className.endsWith(".class") || className.contains("$")) {
|
|
|
|
// Ignore non-class or nested classes.
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
className = className.substring(0, className.length() - 6); // Strip .class
|
|
|
|
String fqname = rippersPackage + "." + className;
|
|
|
|
Class<?> clazz = Class.forName(fqname);
|
|
|
|
constructors.add( (Constructor<?>) clazz.getConstructor(URL.class));
|
|
|
|
}
|
|
|
|
return constructors;
|
|
|
|
}
|
2014-03-01 11:13:32 +01:00
|
|
|
|
|
|
|
public void sendUpdate(STATUS status, Object message) {
|
2014-03-02 04:37:09 +01:00
|
|
|
if (observer == null) {
|
|
|
|
return;
|
|
|
|
}
|
2014-03-01 11:13:32 +01:00
|
|
|
synchronized (observer) {
|
|
|
|
observer.update(this, new RipStatusMessage(status, message));
|
|
|
|
observer.notifyAll();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-03-01 11:43:47 +01:00
|
|
|
public void run() {
|
|
|
|
try {
|
|
|
|
rip();
|
|
|
|
} catch (IOException e) {
|
|
|
|
logger.error("Got exception while running ripper:", e);
|
|
|
|
}
|
|
|
|
}
|
2014-02-26 08:44:22 +01:00
|
|
|
}
|