WordpressComicRipper: fix formatting

2017-06-06 10:25:27 -07:00 · 2017-06-06 10:25:27 -07:00 · b8294b8152
commit b8294b8152
parent 90a8ab6f81
1 changed files with 158 additions and 160 deletions
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/WordpressComicRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/WordpressComicRipper.java
@ -16,7 +16,7 @@ import com.rarchives.ripme.ripper.AbstractHTMLRipper;
 import com.rarchives.ripme.utils.Http;
 public class WordpressComicRipper extends AbstractHTMLRipper {
-    String pageTitle ="";
+    String pageTitle = "";
    public WordpressComicRipper(URL url) throws IOException {
        super(url);
@ -33,77 +33,77 @@ public class WordpressComicRipper extends AbstractHTMLRipper {
    // http://comics-xxx.com/republic-rendezvous-palcomix-star-wars-xxx/
    public static List<String> explicit_domains = Arrays.asList("www.totempole666.com",
-    "buttsmithy.com", "themonsterunderthebed.net", "prismblush.com", "www.konradokonski.com", "freeadultcomix.com",
+        "buttsmithy.com", "themonsterunderthebed.net", "prismblush.com", "www.konradokonski.com", "freeadultcomix.com",
-    "thisis.delvecomic.com", "comics-xxx.com");
+        "thisis.delvecomic.com", "comics-xxx.com");
        @Override
        public String getHost() {
            String host = url.toExternalForm().split("/")[2];
            return host;
        }
-        @Override
+    @Override
-        public String getDomain() {
+    public String getHost() {
-            String host = url.toExternalForm().split("/")[2];
+        String host = url.toExternalForm().split("/")[2];
-            return host;
+        return host;
-        }
+    }
-        @Override
+    @Override
-        public boolean canRip(URL url) {
+    public String getDomain() {
-            String url_name = url.toExternalForm();
+        String host = url.toExternalForm().split("/")[2];
-            if (explicit_domains.contains(url_name.split("/")[2])) {
+        return host;
-
+    }
                Pattern totempole666Pat = Pattern.compile("https?://www\\.totempole666.com/comic/([a-zA-Z0-9_-]*)/?$");
                Matcher totempole666Mat = totempole666Pat.matcher(url.toExternalForm());
                if (totempole666Mat.matches()) {
                    return true;
                }
                Pattern konradokonskiPat = Pattern.compile("https?://www.konradokonski.com/sawdust/comic/([a-zA-Z0-9_-]*)/?$");
                Matcher konradokonskiMat = konradokonskiPat.matcher(url.toExternalForm());
                if (konradokonskiMat.matches()) {
                    return true;
                }
                Pattern buttsmithyPat = Pattern.compile("https?://buttsmithy.com/archives/comic/([a-zA-Z0-9_-]*)/?$");
                Matcher buttsmithyMat = buttsmithyPat.matcher(url.toExternalForm());
                if (buttsmithyMat.matches()) {
                    return true;
                }
                Pattern theMonsterUnderTheBedPat = Pattern.compile("https?://themonsterunderthebed.net/\\?comic=([a-zA-Z0-9_-]*)/?$");
                Matcher theMonsterUnderTheBedMat = theMonsterUnderTheBedPat.matcher(url.toExternalForm());
                if (theMonsterUnderTheBedMat.matches()) {
                    return true;
                }
                Pattern prismblushPat = Pattern.compile("https?://prismblush.com/comic/([a-zA-Z0-9_-]*)/?$");
                Matcher prismblushMat = prismblushPat.matcher(url.toExternalForm());
                if (prismblushMat.matches()) {
                    return true;
                }
                Pattern freeadultcomixPat = Pattern.compile("https?://freeadultcomix.com/([a-zA-Z0-9_\\-]*)/?$");
                Matcher freeadultcomixMat = freeadultcomixPat.matcher(url.toExternalForm());
                if (freeadultcomixMat.matches()) {
                    return true;
                }
                Pattern thisisDelvecomicPat = Pattern.compile("https?://thisis.delvecomic.com/NewWP/comic/([a-zA-Z0-9_\\-]*)/?$");
                Matcher thisisDelvecomicMat = thisisDelvecomicPat.matcher(url.toExternalForm());
                if (thisisDelvecomicMat.matches()) {
                    return true;
                }
                Pattern comicsxxxPat = Pattern.compile("https?://comics-xxx.com/([a-zA-Z0-9_\\-]*)/?$");
                Matcher comicsxxxMat = comicsxxxPat.matcher(url.toExternalForm());
                if (comicsxxxMat.matches()) {
                    return true;
                }
    @Override
    public boolean canRip(URL url) {
        String url_name = url.toExternalForm();
        if (explicit_domains.contains(url_name.split("/")[2])) {
            Pattern totempole666Pat = Pattern.compile("https?://www\\.totempole666.com/comic/([a-zA-Z0-9_-]*)/?$");
            Matcher totempole666Mat = totempole666Pat.matcher(url.toExternalForm());
            if (totempole666Mat.matches()) {
                return true;
            }
            Pattern konradokonskiPat = Pattern.compile("https?://www.konradokonski.com/sawdust/comic/([a-zA-Z0-9_-]*)/?$");
            Matcher konradokonskiMat = konradokonskiPat.matcher(url.toExternalForm());
            if (konradokonskiMat.matches()) {
                return true;
            }
            Pattern buttsmithyPat = Pattern.compile("https?://buttsmithy.com/archives/comic/([a-zA-Z0-9_-]*)/?$");
            Matcher buttsmithyMat = buttsmithyPat.matcher(url.toExternalForm());
            if (buttsmithyMat.matches()) {
                return true;
            }
            Pattern theMonsterUnderTheBedPat = Pattern.compile("https?://themonsterunderthebed.net/\\?comic=([a-zA-Z0-9_-]*)/?$");
            Matcher theMonsterUnderTheBedMat = theMonsterUnderTheBedPat.matcher(url.toExternalForm());
            if (theMonsterUnderTheBedMat.matches()) {
                return true;
            }
            Pattern prismblushPat = Pattern.compile("https?://prismblush.com/comic/([a-zA-Z0-9_-]*)/?$");
            Matcher prismblushMat = prismblushPat.matcher(url.toExternalForm());
            if (prismblushMat.matches()) {
                return true;
            }
            Pattern freeadultcomixPat = Pattern.compile("https?://freeadultcomix.com/([a-zA-Z0-9_\\-]*)/?$");
            Matcher freeadultcomixMat = freeadultcomixPat.matcher(url.toExternalForm());
            if (freeadultcomixMat.matches()) {
                return true;
            }
            Pattern thisisDelvecomicPat = Pattern.compile("https?://thisis.delvecomic.com/NewWP/comic/([a-zA-Z0-9_\\-]*)/?$");
            Matcher thisisDelvecomicMat = thisisDelvecomicPat.matcher(url.toExternalForm());
            if (thisisDelvecomicMat.matches()) {
                return true;
            }
            Pattern comicsxxxPat = Pattern.compile("https?://comics-xxx.com/([a-zA-Z0-9_\\-]*)/?$");
            Matcher comicsxxxMat = comicsxxxPat.matcher(url.toExternalForm());
            if (comicsxxxMat.matches()) {
                return true;
            }
            return false;
        }
        return false;
    }
    @Override
    public String getAlbumTitle(URL url) throws MalformedURLException {
        Pattern totempole666Pat = Pattern.compile("(?:https?://)?(?:www\\.)?totempole666.com\\/comic/([a-zA-Z0-9_-]*)/?$");
@ -117,6 +117,7 @@ public class WordpressComicRipper extends AbstractHTMLRipper {
        if (buttsmithyMat.matches()) {
            return "buttsmithy.com" + "_" + "Alfie";
        }
        Pattern konradokonskiSawdustPat = Pattern.compile("http://www.konradokonski.com/sawdust/comic/([a-zA-Z0-9_-]*)/?$");
        Matcher konradokonskiSawdustMat = konradokonskiSawdustPat.matcher(url.toExternalForm());
        if (konradokonskiSawdustMat.matches()) {
@ -154,110 +155,107 @@ public class WordpressComicRipper extends AbstractHTMLRipper {
        }
        return super.getAlbumTitle(url);
-}
+    }
-@Override
+    @Override
-public String getGID(URL url) throws MalformedURLException {
+    public String getGID(URL url) throws MalformedURLException {
-            String url_name = url.toExternalForm();
+        String url_name = url.toExternalForm();
-            // We shouldn't need to return any GID
+        // We shouldn't need to return any GID
-            if (explicit_domains.contains(url_name.split("/")[2])) {
+        if (explicit_domains.contains(url_name.split("/")[2])) {
-                return "";
+            return "";
        }
        throw new MalformedURLException("You should never see this error message");
    }
    @Override
    public Document getNextPage(Document doc) throws IOException {
        // Find next page
        String nextPage = "";
        Element elem = null;
        if (getHost().contains("www.totempole666.com")
                || getHost().contains("buttsmithy.com")
                || getHost().contains("themonsterunderthebed.net")
                || getHost().contains("prismblush.com")
                || getHost().contains("www.konradokonski.com")
                || getHost().contains("thisis.delvecomic.com")) {
            elem = doc.select("a.comic-nav-next").first();
            if (elem == null) {
                throw new IOException("No more pages");
            }
-            throw new MalformedURLException("You should never see this error message");
+            nextPage = elem.attr("href");
        }
        if (nextPage == "") {
            throw new IOException("No more pages");
        } else {
            return Http.url(nextPage).get();
        }
    }
    @Override
    public List<String> getURLsFromPage(Document doc) {
        List<String> result = new ArrayList<String>();
        if (getHost().contains("www.totempole666.com")
                || getHost().contains("buttsmithy.com")
                || getHost().contains("themonsterunderthebed.net")
                || getHost().contains("prismblush.com")
                || getHost().contains("www.konradokonski.com")
                || getHost().contains("thisis.delvecomic.com")) {
            Element elem = doc.select("div.comic-table > div#comic > a > img").first();
            // If doc is the last page in the comic then elem.attr("src") returns null
            // because there is no link <a> to the next page
            if (elem == null) {
                elem = doc.select("div.comic-table > div#comic > img").first();
            }
            // Check if this is a site where we can get the page number from the title
            if (url.toExternalForm().contains("buttsmithy.com")) {
                // Set the page title
                pageTitle = doc.select("meta[property=og:title]").attr("content");
                pageTitle = pageTitle.replace(" ", "");
                pageTitle = pageTitle.replace("P", "p");
            }
            if (url.toExternalForm().contains("www.totempole666.com")) {
                String postDate = doc.select("span.post-date").first().text().replaceAll("/", "_");
                String postTitle = doc.select("h2.post-title").first().text().replaceAll("#", "");
                pageTitle = postDate + "_" + postTitle;
            }
            if (url.toExternalForm().contains("themonsterunderthebed.net")) {
                pageTitle = doc.select("title").first().text().replaceAll("#", "");
                pageTitle = pageTitle.replace("“", "");
                pageTitle = pageTitle.replace("”", "");
                pageTitle = pageTitle.replace("The Monster Under the Bed", "");
                pageTitle = pageTitle.replace("–", "");
                pageTitle = pageTitle.replace(",", "");
                pageTitle = pageTitle.replace(" ", "");
            }
            result.add(elem.attr("src"));
        }
-        @Override
+        // freeadultcomix gets it own if because it needs to add http://freeadultcomix.com to the start of each link
-        public Document getNextPage(Document doc) throws IOException {
+        if (url.toExternalForm().contains("freeadultcomix.com")) {
-            // Find next page
+            for (Element elem : doc.select("div.single-post > p > img.aligncenter")) {
-            String nextPage = "";
+                result.add("http://freeadultcomix.com" + elem.attr("src"));
            Element elem = null;
            if (getHost().contains("www.totempole666.com")
            || getHost().contains("buttsmithy.com")
            || getHost().contains("themonsterunderthebed.net")
            || getHost().contains("prismblush.com")
            || getHost().contains("www.konradokonski.com")
            || getHost().contains("thisis.delvecomic.com")) {
                elem = doc.select("a.comic-nav-next").first();
                if (elem == null) {
                    throw new IOException("No more pages");
                }
                nextPage = elem.attr("href");
            }
                if (nextPage == "") {
                    throw new IOException("No more pages");
                }
                else {
                    return Http.url(nextPage).get();
                }
            }
        }
-        @Override
+        if (url.toExternalForm().contains("comics-xxx.com")) {
-        public List<String> getURLsFromPage(Document doc) {
+            for (Element elem : doc.select("div.single-post > center > p > img")) {
            List<String> result = new ArrayList<String>();
            if (getHost().contains("www.totempole666.com")
            || getHost().contains("buttsmithy.com")
            || getHost().contains("themonsterunderthebed.net")
            || getHost().contains("prismblush.com")
            || getHost().contains("www.konradokonski.com")
            || getHost().contains("thisis.delvecomic.com")) {
                Element elem = doc.select("div.comic-table > div#comic > a > img").first();
                // If doc is the last page in the comic then elem.attr("src") returns null
                // because there is no link <a> to the next page
                if (elem == null) {
                    elem = doc.select("div.comic-table > div#comic > img").first();
                }
                // Check if this is a site where we can get the page number from the title
                if (url.toExternalForm().contains("buttsmithy.com")) {
                    // Set the page title
                    pageTitle = doc.select("meta[property=og:title]").attr("content");
                    pageTitle = pageTitle.replace(" ", "");
                    pageTitle = pageTitle.replace("P", "p");
                }
                if (url.toExternalForm().contains("www.totempole666.com")) {
                    String postDate = doc.select("span.post-date").first().text().replaceAll("/", "_");
                    String postTitle = doc.select("h2.post-title").first().text().replaceAll("#", "");
                    pageTitle = postDate + "_" + postTitle;
                }
                if (url.toExternalForm().contains("themonsterunderthebed.net")) {
                    pageTitle = doc.select("title").first().text().replaceAll("#", "");
                    pageTitle = pageTitle.replace("“", "");
                    pageTitle = pageTitle.replace("”", "");
                    pageTitle = pageTitle.replace("The Monster Under the Bed", "");
                    pageTitle = pageTitle.replace("–", "");
                    pageTitle = pageTitle.replace(",", "");
                    pageTitle = pageTitle.replace(" ", "");
                }
                result.add(elem.attr("src"));
            }
            // freeadultcomix gets it own if because it needs to add http://freeadultcomix.com to the start of each link
            if (url.toExternalForm().contains("freeadultcomix.com")) {
                for (Element elem : doc.select("div.single-post > p > img.aligncenter")) {
                    result.add("http://freeadultcomix.com" + elem.attr("src"));
                }
            }
            if (url.toExternalForm().contains("comics-xxx.com")) {
                for (Element elem : doc.select("div.single-post > center > p > img")) {
                    result.add(elem.attr("src"));
                }
            }
            return result;
        }
        return result;
    }
-        @Override
+    @Override
-        public void downloadURL(URL url, int index) {
+    public void downloadURL(URL url, int index) {
-            // Download the url with the page title as the prefix
+        // Download the url with the page title as the prefix
-            // so we can download them in any order (And don't have to rerip the whole site to update the local copy)
+        // so we can download them in any order (And don't have to rerip the whole site to update the local copy)
-            if (getHost().contains("buttsmithy.com")
+        if (getHost().contains("buttsmithy.com")
-            || getHost().contains("www.totempole666.com")
+                || getHost().contains("www.totempole666.com")
-            || getHost().contains("themonsterunderthebed.net")) {
+                || getHost().contains("themonsterunderthebed.net")) {
-                addURLToDownload(url, pageTitle + "_");
+            addURLToDownload(url, pageTitle + "_");
-            }
+        }
-            // If we're ripping a site where we can't get the page number/title we just rip normally
+        // If we're ripping a site where we can't get the page number/title we just rip normally
-            addURLToDownload(url, getPrefix(index));
+        addURLToDownload(url, getPrefix(index));
    }
    @Override