Merge pull request #296 from metaprime/hushpix-consent

Format CheveretoRipper; implement AGREE_CONSENT cookie for hushpix; add CheveretoRipperTest
2017-11-29 03:37:53 -08:00 · 2017-11-29 03:37:53 -08:00 · 5c95fedd5c
commit 5c95fedd5c
parent 8e6ec3e3e3 d9dc00666f
2 changed files with 117 additions and 95 deletions
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/CheveretoRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/CheveretoRipper.java
@ -6,6 +6,8 @@ import java.net.URL;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;

@ -16,105 +18,107 @@ import com.rarchives.ripme.ripper.AbstractHTMLRipper;
 import com.rarchives.ripme.utils.Http;

 public class CheveretoRipper extends AbstractHTMLRipper {
+    private static final Map<String, String> CONSENT_COOKIE;
+    static {
+        CONSENT_COOKIE = new TreeMap<String, String>();
+        CONSENT_COOKIE.put("AGREE_CONSENT", "1");
+    }

    public CheveretoRipper(URL url) throws IOException {
-    super(url);
+        super(url);
    }

    private static List<String> explicit_domains_1 = Arrays.asList("hushpix.com", "tag-fox.com");
-        @Override
-        public String getHost() {
-            return url.toExternalForm().split("/")[2];
-        }
-
-        @Override
-        public String getDomain() {
-            return url.toExternalForm().split("/")[2];
-        }
-
-        @Override
-        public boolean canRip(URL url) {
-            String url_name = url.toExternalForm();
-            if (explicit_domains_1.contains(url_name.split("/")[2])) {
-                Pattern pa = Pattern.compile("(?:https?://)?(?:www\\.)?[a-z1-9-]*\\.[a-z1-9]*/album/([a-zA-Z1-9]*)/?$");
-                Matcher ma = pa.matcher(url.toExternalForm());
-                if (ma.matches()) {
-                    return true;
-                }
-            }
-            return false;
-        }
-
-        @Override
-        public String getAlbumTitle(URL url) throws MalformedURLException {
-            try {
-                // Attempt to use album title as GID
-                Element titleElement = getFirstPage().select("meta[property=og:title]").first();
-                String title = titleElement.attr("content");
-                title = title.substring(title.lastIndexOf('/') + 1);
-                return getHost() + "_" + title.trim();
-            } catch (IOException e) {
-                // Fall back to default album naming convention
-                logger.info("Unable to find title at " + url);
-            }
-            return super.getAlbumTitle(url);
-        }
-
-
-        @Override
-        public String getGID(URL url) throws MalformedURLException {
-            Pattern p = Pattern.compile("(?:https?://)?(?:www\\.)?[a-z1-9-]*\\.[a-z1-9]*/album/([a-zA-Z1-9]*)/?$");
-            Matcher m = p.matcher(url.toExternalForm());
-            if (m.matches()) {
-                return m.group(1);
-            }
-            throw new MalformedURLException("Expected chevereto URL format: " +
-                            "site.domain/album/albumName or site.domain/username/albums- got " + url + " instead");
-        }
-
-        @Override
-        public Document getFirstPage() throws IOException {
-            // "url" is an instance field of the superclass
-            return Http.url(url).get();
-        }
-
-        @Override
-        public Document getNextPage(Document doc) throws IOException {
-            // Find next page
-            String nextUrl = "";
-            // We use comic-nav-next to the find the next page
-            Element elem = doc.select("li.pagination-next > a").first();
-                if (elem == null) {
-                    throw new IOException("No more pages");
-                }
-                String nextPage = elem.attr("href");
-                // Some times this returns a empty string
-                // This for stops that
-                if (nextPage == "") {
-                    return null;
-                }
-                else {
-                    return Http.url(nextPage).get();
-                }
-            }
-
-        @Override
-        public List<String> getURLsFromPage(Document doc) {
-            List<String> result = new ArrayList<>();
-                for (Element el : doc.select("a.image-container > img")) {
-                    String imageSource = el.attr("src");
-                    // We remove the .md from images so we download the full size image
-                    // not the medium ones
-                    imageSource = imageSource.replace(".md", "");
-                    result.add(imageSource);
-                }
-            return result;
-        }
-
-        @Override
-        public void downloadURL(URL url, int index) {
-            addURLToDownload(url, getPrefix(index));
-        }
-

+    @Override
+    public String getHost() {
+        return url.toExternalForm().split("/")[2];
    }
+
+    @Override
+    public String getDomain() {
+        return url.toExternalForm().split("/")[2];
+    }
+
+    @Override
+    public boolean canRip(URL url) {
+        String url_name = url.toExternalForm();
+        if (explicit_domains_1.contains(url_name.split("/")[2])) {
+            Pattern pa = Pattern.compile("(?:https?://)?(?:www\\.)?[a-z1-9-]*\\.[a-z1-9]*/album/([a-zA-Z1-9]*)/?$");
+            Matcher ma = pa.matcher(url.toExternalForm());
+            if (ma.matches()) {
+                return true;
+            }
+        }
+        return false;
+    }
+
+    @Override
+    public String getAlbumTitle(URL url) throws MalformedURLException {
+        try {
+            // Attempt to use album title as GID
+            Element titleElement = getFirstPage().select("meta[property=og:title]").first();
+            String title = titleElement.attr("content");
+            title = title.substring(title.lastIndexOf('/') + 1);
+            return getHost() + "_" + title.trim();
+        } catch (IOException e) {
+            // Fall back to default album naming convention
+            logger.info("Unable to find title at " + url);
+        }
+        return super.getAlbumTitle(url);
+    }
+
+    @Override
+    public String getGID(URL url) throws MalformedURLException {
+        Pattern p = Pattern.compile("(?:https?://)?(?:www\\.)?[a-z1-9-]*\\.[a-z1-9]*/album/([a-zA-Z1-9]*)/?$");
+        Matcher m = p.matcher(url.toExternalForm());
+        if (m.matches()) {
+            return m.group(1);
+        }
+        throw new MalformedURLException("Expected chevereto URL format: " +
+                        "site.domain/album/albumName or site.domain/username/albums- got " + url + " instead");
+    }
+
+    @Override
+    public Document getFirstPage() throws IOException {
+        // "url" is an instance field of the superclass
+        return Http.url(url).cookies(CONSENT_COOKIE).get();
+    }
+
+    @Override
+    public Document getNextPage(Document doc) throws IOException {
+        // Find next page
+        String nextUrl = "";
+        // We use comic-nav-next to the find the next page
+        Element elem = doc.select("li.pagination-next > a").first();
+            if (elem == null) {
+                throw new IOException("No more pages");
+            }
+            String nextPage = elem.attr("href");
+            // Some times this returns a empty string
+            // This for stops that
+            if (nextPage == "") {
+                return null;
+            } else {
+                return Http.url(nextPage).cookies(CONSENT_COOKIE).get();
+            }
+        }
+
+    @Override
+    public List<String> getURLsFromPage(Document doc) {
+        List<String> result = new ArrayList<>();
+            for (Element el : doc.select("a.image-container > img")) {
+                String imageSource = el.attr("src");
+                // We remove the .md from images so we download the full size image
+                // not the medium ones
+                imageSource = imageSource.replace(".md", "");
+                result.add(imageSource);
+            }
+        return result;
+    }
+
+    @Override
+    public void downloadURL(URL url, int index) {
+        addURLToDownload(url, getPrefix(index));
+    }
+}
--- a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/CheveretoRipperTest.java
+++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/CheveretoRipperTest.java
@ -0,0 +1,18 @@
+package com.rarchives.ripme.tst.ripper.rippers;
+
+import java.io.IOException;
+import java.net.URL;
+
+import com.rarchives.ripme.ripper.rippers.CheveretoRipper;
+
+public class CheveretoRipperTest extends RippersTest {
+    public void testHushpix() throws IOException {
+        CheveretoRipper ripper = new CheveretoRipper(new URL("https://hushpix.com/album/gKcu"));
+        testRipper(ripper);
+    }
+
+    public void testTagFox() throws IOException {
+        CheveretoRipper ripper = new CheveretoRipper(new URL("http://tag-fox.com/album/Thjb"));
+        testRipper(ripper);
+    }
+}