diff --git a/pom.xml b/pom.xml
index 294671a9..75bfff8c 100644
--- a/pom.xml
+++ b/pom.xml
@@ -4,7 +4,7 @@
com.rarchives.ripme
ripme
jar
- 1.0.48
+ 1.0.49
ripme
http://rip.rarchives.com
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/EHentaiRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/EHentaiRipper.java
index 592a4cb7..4915a2e8 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/EHentaiRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/EHentaiRipper.java
@@ -14,12 +14,23 @@ import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.rarchives.ripme.ripper.AlbumRipper;
+import com.rarchives.ripme.ripper.DownloadThreadPool;
+import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
import com.rarchives.ripme.utils.Utils;
public class EHentaiRipper extends AlbumRipper {
+ // All sleep times are in milliseconds
+ private static final int PAGE_SLEEP_TIME = 3 * 1000;
+ private static final int IMAGE_SLEEP_TIME = 1 * 1000;
+ private static final int IP_BLOCK_SLEEP_TIME = 60 * 1000;
+
private static final String DOMAIN = "g.e-hentai.org", HOST = "e-hentai";
private static final Logger logger = Logger.getLogger(EHentaiRipper.class);
+ // Thread pool for finding direct image links from "image" pages (html)
+ private DownloadThreadPool ehentaiThreadPool = new DownloadThreadPool("ehentai");
+
+ // Current HTML document
private Document albumDoc = null;
public EHentaiRipper(URL url) throws IOException {
@@ -39,13 +50,18 @@ public class EHentaiRipper extends AlbumRipper {
try {
// Attempt to use album title as GID
if (albumDoc == null) {
- albumDoc = Jsoup.connect(url.toExternalForm()).get();
+ logger.info(" Retrieving " + url.toExternalForm());
+ sendUpdate(STATUS.LOADING_RESOURCE, url.toString());
+ albumDoc = Jsoup.connect(url.toExternalForm())
+ .userAgent(USER_AGENT)
+ .timeout(5000)
+ .get();
}
Elements elems = albumDoc.select("#gn");
return HOST + "_" + elems.get(0).text();
} catch (Exception e) {
// Fall back to default album naming convention
- e.printStackTrace();
+ logger.warn("Failed to get album title from " + url, e);
}
return super.getAlbumTitle(url);
}
@@ -55,8 +71,6 @@ public class EHentaiRipper extends AlbumRipper {
Pattern p;
Matcher m;
- System.out.println(url);
-
p = Pattern.compile("^.*g\\.e-hentai\\.org/g/([0-9]+)/([a-fA-F0-9]+)/$");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
@@ -71,67 +85,76 @@ public class EHentaiRipper extends AlbumRipper {
@Override
public void rip() throws IOException {
- int index = 0;
- if (albumDoc == null) {
- logger.info(" Retrieving " + this.url.toExternalForm());
- albumDoc = Jsoup.connect(this.url.toExternalForm()).get();
- }
- Elements select = albumDoc.select("#gdt > .gdtm");
- Element first = select.first();
- String href = first.select("a").attr("href");
- if (href.equals("")) {
- throw new IOException("Could not find 'href' inside elements under #gdt > .gdtm > a");
- }
- URL cursorUrl = new URL(href), prevUrl = null;
-
- while (!cursorUrl.equals(prevUrl)) {
- prevUrl = cursorUrl;
- Document cursorDoc = Jsoup.connect(cursorUrl.toExternalForm())
- .userAgent(USER_AGENT)
- .get();
-
- Elements a = cursorDoc.select(".sni > a");
- Elements images = a.select("img");
- if (images.size() == 0) {
- logger.info("cursorDoc: " + cursorDoc.toString());
- logger.error("No images found at " + cursorUrl);
- break;
+ int index = 0, retries = 3;
+ String nextUrl = this.url.toExternalForm();
+ while (true) {
+ if (albumDoc == null) {
+ logger.info(" Retrieving album page " + nextUrl);
+ sendUpdate(STATUS.LOADING_RESOURCE, nextUrl);
+ albumDoc = Jsoup.connect(nextUrl)
+ .userAgent(USER_AGENT)
+ .timeout(5000)
+ .referrer(this.url.toExternalForm())
+ .get();
}
-
- String imgsrc = images.get(0).attr("src");
- if (imgsrc.equals("")) {
- logger.warn("Image URL is empty via " + images.get(0));
+ // Check for rate limiting
+ if (albumDoc.toString().contains("IP address will be automatically banned")) {
+ if (retries == 0) {
+ logger.error("Hit rate limit and maximum number of retries, giving up");
+ break;
+ }
+ logger.warn("Hit rate limit while loading " + nextUrl + ", sleeping for " + IP_BLOCK_SLEEP_TIME + "ms, " + retries + " retries remaining");
+ retries--;
+ try {
+ Thread.sleep(IP_BLOCK_SLEEP_TIME);
+ } catch (InterruptedException e) {
+ logger.error("Interrupted while waiting for rate limit to subside", e);
+ break;
+ }
+ albumDoc = null;
continue;
}
- logger.info("Found URL " + imgsrc + " via " + images.get(0));
- Pattern p = Pattern.compile("^http://.*/ehg/image.php.*&n=([^&]+).*$");
- Matcher m = p.matcher(imgsrc);
- if (m.matches()) {
- // Manually discover filename from URL
- String savePath = this.workingDir + File.separator;
- if (Utils.getConfigBoolean("download.save_order", true)) {
- savePath += String.format("%03d_", index + 1);
- }
- savePath += m.group(1);
- addURLToDownload(new URL(imgsrc), new File(savePath));
- }
- else {
- // Provide prefix and let the AbstractRipper "guess" the filename
- String prefix = "";
- if (Utils.getConfigBoolean("download.save_order", true)) {
- prefix = String.format("%03d_", index + 1);
- }
- addURLToDownload(new URL(imgsrc), prefix);
- }
-
- String nextUrl = a.attr("href");
- if (nextUrl.equals("")) {
- logger.warn("Next page URL is empty, via " + a);
+ // Find thumbnails
+ Elements thumbs = albumDoc.select("#gdt > .gdtm a");
+ if (thumbs.size() == 0) {
+ logger.info("albumDoc: " + albumDoc);
+ logger.info("No images found at " + nextUrl);
break;
}
- cursorUrl = new URL(nextUrl);
+ // Iterate over images on page
+ for (Element thumb : thumbs) {
+ index++;
+ EHentaiImageThread t = new EHentaiImageThread(new URL(thumb.attr("href")), index, this.workingDir);
+ ehentaiThreadPool.addThread(t);
+ try {
+ Thread.sleep(IMAGE_SLEEP_TIME);
+ } catch (InterruptedException e) {
+ logger.warn("Interrupted while waiting to load next image", e);
+ }
+ }
+ // Find next page
+ Elements hrefs = albumDoc.select(".ptt a");
+ if (hrefs.size() == 0) {
+ logger.info("No navigation links found at " + nextUrl);
+ break;
+ }
+ // Ensure next page is different from the current page
+ String lastUrl = nextUrl;
+ nextUrl = hrefs.last().attr("href");
+ if (lastUrl.equals(nextUrl)) {
+ break; // We're on the last page
+ }
- index++;
+ // Reset albumDoc so we fetch the page next time
+ albumDoc = null;
+
+ // Sleep before loading next page
+ try {
+ Thread.sleep(PAGE_SLEEP_TIME);
+ } catch (InterruptedException e) {
+ logger.error("Interrupted while waiting to load next page", e);
+ break;
+ }
}
waitForThreads();
@@ -140,4 +163,90 @@ public class EHentaiRipper extends AlbumRipper {
public boolean canRip(URL url) {
return url.getHost().endsWith(DOMAIN);
}
+
+ /**
+ * Helper class to find and download images found on "image" pages
+ *
+ * Handles case when site has IP-banned the user.
+ */
+ private class EHentaiImageThread extends Thread {
+ private URL url;
+ private int index;
+ private File workingDir;
+ private int retries = 3;
+
+ public EHentaiImageThread(URL url, int index, File workingDir) {
+ super();
+ this.url = url;
+ this.index = index;
+ this.workingDir = workingDir;
+ }
+
+ @Override
+ public void run() {
+ fetchImage();
+ }
+
+ private void fetchImage() {
+ try {
+ Document doc = Jsoup.connect(this.url.toExternalForm())
+ .userAgent(USER_AGENT)
+ .timeout(5000)
+ .referrer(this.url.toExternalForm())
+ .get();
+ // Check for rate limit
+ if (doc.toString().contains("IP address will be automatically banned")) {
+ if (this.retries == 0) {
+ logger.error("Rate limited & ran out of retries, skipping image at " + this.url);
+ return;
+ }
+ logger.warn("Hit rate limit. Sleeping for " + IP_BLOCK_SLEEP_TIME + "ms");
+ try {
+ Thread.sleep(IP_BLOCK_SLEEP_TIME);
+ } catch (InterruptedException e) {
+ logger.error("Interrupted while waiting for rate limit to subside", e);
+ return;
+ }
+ this.retries--;
+ fetchImage(); // Re-attempt to download the image
+ return;
+ }
+
+ // Find image
+ Elements images = doc.select(".sni > a > img");
+ if (images.size() == 0) {
+ // Attempt to find image elsewise (Issue #41)
+ images = doc.select("img#img");
+ if (images.size() == 0) {
+ logger.warn("Image not found at " + this.url);
+ return;
+ }
+ }
+ Element image = images.first();
+ String imgsrc = image.attr("src");
+ logger.info("Found URL " + imgsrc + " via " + images.get(0));
+ Pattern p = Pattern.compile("^http://.*/ehg/image.php.*&n=([^&]+).*$");
+ Matcher m = p.matcher(imgsrc);
+ if (m.matches()) {
+ // Manually discover filename from URL
+ String savePath = this.workingDir + File.separator;
+ if (Utils.getConfigBoolean("download.save_order", true)) {
+ savePath += String.format("%03d_", index);
+ }
+ savePath += m.group(1);
+ addURLToDownload(new URL(imgsrc), new File(savePath));
+ }
+ else {
+ // Provide prefix and let the AbstractRipper "guess" the filename
+ String prefix = "";
+ if (Utils.getConfigBoolean("download.save_order", true)) {
+ prefix = String.format("%03d_", index);
+ }
+ addURLToDownload(new URL(imgsrc), prefix);
+ }
+ } catch (IOException e) {
+ logger.error("[!] Exception while loading/parsing " + this.url, e);
+ }
+ }
+ }
}
\ No newline at end of file
diff --git a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java
index e5086b45..93c6d54f 100644
--- a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java
+++ b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java
@@ -21,7 +21,7 @@ import com.rarchives.ripme.utils.Utils;
public class UpdateUtils {
private static final Logger logger = Logger.getLogger(UpdateUtils.class);
- private static final String DEFAULT_VERSION = "1.0.48";
+ private static final String DEFAULT_VERSION = "1.0.49";
private static final String updateJsonURL = "http://rarchives.com/ripme.json";
private static final String updateJarURL = "http://rarchives.com/ripme.jar";
private static final String mainFileName = "ripme.jar";