1.0.66 Fixing GirlsOfDesire ripper #72

2014-06-12 23:12:34 -07:00 · 2014-06-12 23:12:34 -07:00 · 3d06bb51e2
commit 3d06bb51e2
parent 58264cacca
4 changed files with 32 additions and 153 deletions
--- a/pom.xml
+++ b/pom.xml
@ -4,7 +4,7 @@
  <groupId>com.rarchives.ripme</groupId>
  <artifactId>ripme</artifactId>
  <packaging>jar</packaging>
-  <version>1.0.65</version>
+  <version>1.0.66</version>
  <name>ripme</name>
  <url>http://rip.rarchives.com</url>
  <properties>
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/GirlsOfDesireRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/GirlsOfDesireRipper.java
@ -1,6 +1,5 @@
 package com.rarchives.ripme.ripper.rippers;

-import java.io.File;
 import java.io.IOException;
 import java.net.MalformedURLException;
 import java.net.URL;
@ -13,22 +12,15 @@ import org.jsoup.nodes.Element;
 import org.jsoup.select.Elements;

 import com.rarchives.ripme.ripper.AlbumRipper;
-import com.rarchives.ripme.ripper.DownloadThreadPool;
 import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
-import com.rarchives.ripme.utils.Utils;

 public class GirlsOfDesireRipper extends AlbumRipper {
    // All sleep times are in milliseconds
-    private static final int PAGE_SLEEP_TIME     = 3  * 1000;
-    private static final int IMAGE_SLEEP_TIME    = 1  * 1000;
-    private static final int IP_BLOCK_SLEEP_TIME = 60 * 1000;
-    private static final int TIMEOUT             = 5  * 1000;
+    private static final int IMAGE_SLEEP_TIME    = 100;
+    private static final int TIMEOUT             = 5 * 1000;

    private static final String DOMAIN = "girlsofdesire.org", HOST = "GirlsOfDesire";

-    // Thread pool for finding direct image links from "image" pages (html)
-    private DownloadThreadPool girlsOfDesireThreadPool = new DownloadThreadPool(HOST);
-
    // Current HTML document
    private Document albumDoc = null;

@ -57,7 +49,7 @@ public class GirlsOfDesireRipper extends AlbumRipper {
                                .get();
            }
            Elements elems = albumDoc.select(".albumName");
-            return HOST + "_" + elems.get(0).text();
+            return HOST + "_" + elems.first().text();
        } catch (Exception e) {
            // Fall back to default album naming convention
            logger.warn("Failed to get album title from " + url, e);
@ -84,69 +76,40 @@ public class GirlsOfDesireRipper extends AlbumRipper {

    @Override
    public void rip() throws IOException {
-        int index = 0, retries = 3;
        String nextUrl = this.url.toExternalForm();

-        while (true) {
+        if (albumDoc == null) {
+            logger.info("    Retrieving album page " + nextUrl);
+            sendUpdate(STATUS.LOADING_RESOURCE, nextUrl);
+            albumDoc = Jsoup.connect(nextUrl)
+                    .userAgent(USER_AGENT)
+                    .timeout(TIMEOUT)
+                    .referrer(this.url.toExternalForm())
+                    .get();
+        }
+
+        // Find thumbnails
+        Elements thumbs = albumDoc.select("td.vtop > a > img");
+        if (thumbs.size() == 0) {
+            logger.info("No images found at " + nextUrl);
+        }
+
+        // Iterate over images on page
+        for (Element thumb : thumbs) {
            if (isStopped()) {
                break;
            }
-            if (albumDoc == null) {
-                logger.info("    Retrieving album page " + nextUrl);
-                sendUpdate(STATUS.LOADING_RESOURCE, nextUrl);
-                albumDoc = Jsoup.connect(nextUrl)
-                                .userAgent(USER_AGENT)
-                                .timeout(TIMEOUT)
-                                .referrer(this.url.toExternalForm())
-                                .get();
-            }
+            // Convert thumbnail to full-size image
+            String imgSrc = thumb.attr("src");
+            imgSrc = imgSrc.replaceAll("_thumb\\.", ".");
+            URL imgUrl = new URL(url, imgSrc);

-            // Check for rate limiting
-            // TODO copied from EHentaiRipper - how does this need to work on GirlsOfDesire?
-            if (albumDoc.toString().contains("IP address will be automatically banned")) {
-                if (retries == 0) {
-                    logger.error("Hit rate limit and maximum number of retries, giving up");
-                    break;
-                }
-                logger.warn("Hit rate limit while loading " + nextUrl + ", sleeping for " + IP_BLOCK_SLEEP_TIME + "ms, " + retries + " retries remaining");
-                retries--;
-                try {
-                    Thread.sleep(IP_BLOCK_SLEEP_TIME);
-                } catch (InterruptedException e) {
-                    logger.error("Interrupted while waiting for rate limit to subside", e);
-                    break;
-                }
-                albumDoc = null;
-                continue;
-            }
+            addURLToDownload(imgUrl, "", "", this.url.toExternalForm(), null);

-            // Find thumbnails
-            Elements thumbs = albumDoc.select("#box_10 > table a");
-            if (thumbs.size() == 0) {
-                logger.info("albumDoc: " + albumDoc);
-                logger.info("No images found at " + nextUrl);
-                break;
-            }
-
-            // Iterate over images on page
-            for (Element thumb : thumbs) {
-                if (isStopped()) {
-                    break;
-                }
-                index++;
-                String imgSrc = thumb.attr("href");
-                URL imgUrl = new URL(url, imgSrc);
-                GirlsOfDesireImageThread t = new GirlsOfDesireImageThread(imgUrl, index, this.workingDir);
-                girlsOfDesireThreadPool.addThread(t);
-                try {
-                    Thread.sleep(IMAGE_SLEEP_TIME);
-                } catch (InterruptedException e) {
-                    logger.warn("Interrupted while waiting to load next image", e);
-                }
-            }
-
-            if (isStopped()) {
-                break;
+            try {
+                Thread.sleep(IMAGE_SLEEP_TIME);
+            } catch (InterruptedException e) {
+                logger.warn("Interrupted while waiting to load next image", e);
            }
        }

@ -156,87 +119,4 @@ public class GirlsOfDesireRipper extends AlbumRipper {
    public boolean canRip(URL url) {
        return url.getHost().endsWith(DOMAIN);
    }
-
-    /**
-     * Helper class to find and download images found on "image" pages
-     *
-     * Handles case when site has IP-banned the user.
-     */
-    private class GirlsOfDesireImageThread extends Thread {
-        private URL url;
-        private int index;
-        private File workingDir;
-        private int retries = 3;
-
-        public GirlsOfDesireImageThread(URL url, int index, File workingDir) {
-            super();
-            this.url = url;
-            this.index = index;
-            this.workingDir = workingDir;
-        }
-
-        @Override
-        public void run() {
-            fetchImage();
-        }
-
-        private void fetchImage() {
-            try {
-                Document doc = Jsoup.connect(this.url.toExternalForm())
-                                    .userAgent(USER_AGENT)
-                                    .timeout(TIMEOUT)
-                                    .referrer(this.url.toExternalForm())
-                                    .get();
-                // Check for rate limit
-                // TODO copied from EHentaiRipper - how does this need to work on GirlsOfDesire?
-                if (doc.toString().contains("IP address will be automatically banned")) {
-                    if (this.retries == 0) {
-                        logger.error("Rate limited & ran out of retries, skipping image at " + this.url);
-                        return;
-                    }
-                    logger.warn("Hit rate limit. Sleeping for " + IP_BLOCK_SLEEP_TIME + "ms");
-                    try {
-                        Thread.sleep(IP_BLOCK_SLEEP_TIME);
-                    } catch (InterruptedException e) {
-                        logger.error("Interrupted while waiting for rate limit to subside", e);
-                        return;
-                    }
-                    this.retries--;
-
-                    fetchImage(); // Re-attempt to download the image
-                    return;
-                }
-
-                // Find image
-                Elements divs = doc.select("#box_12 > div");
-                Element div = divs.get(1);
-                Element image = div.select("a > img").first();
-                String imgsrc = image.attr("src");
-                URL imgUrl = new URL(url, imgsrc);
-
-                logger.info("Found URL " + imgUrl.toExternalForm() + " via " + image);
-                Pattern p = Pattern.compile("^http://.*/([\\d]+).jpg$"); // TODO only compile this regex once
-                Matcher m = p.matcher(imgsrc);
-                if (m.matches()) {
-                    // Manually discover filename from URL
-                    String savePath = this.workingDir + File.separator;
-                    if (Utils.getConfigBoolean("download.save_order", true)) {
-                        savePath += String.format("%03d_", index);
-                    }
-                    savePath += m.group(1);
-                    addURLToDownload(imgUrl, new File(savePath));
-                }
-                else {
-                    // Provide prefix and let the AbstractRipper "guess" the filename
-                    String prefix = "";
-                    if (Utils.getConfigBoolean("download.save_order", true)) {
-                        prefix = String.format("%03d_", index);
-                    }
-                    addURLToDownload(imgUrl, prefix);
-                }
-            } catch (IOException e) {
-                logger.error("[!] Exception while loading/parsing " + this.url, e);
-            }
-        }
-    }
 }
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/video/ViddmeRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/video/ViddmeRipper.java
@ -8,7 +8,6 @@ import java.util.regex.Pattern;

 import org.jsoup.Jsoup;
 import org.jsoup.nodes.Document;
-import org.jsoup.nodes.Element;
 import org.jsoup.select.Elements;

 import com.rarchives.ripme.ripper.VideoRipper;
--- a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java
+++ b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java
@ -21,7 +21,7 @@ import com.rarchives.ripme.utils.Utils;
 public class UpdateUtils {

    private static final Logger logger = Logger.getLogger(UpdateUtils.class);
-    private static final String DEFAULT_VERSION = "1.0.65";
+    private static final String DEFAULT_VERSION = "1.0.66";
    private static final String updateJsonURL = "http://rarchives.com/ripme.json";
    private static final String updateJarURL = "http://rarchives.com/ripme.jar";
    private static final String mainFileName = "ripme.jar";