From 81f888efdcd721c7819cc865160f5e13facb2c4a Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Mon, 23 Oct 2017 10:31:08 -0400 Subject: [PATCH 1/2] hentai2read ripper now handles short comics and urls with and without a trailing / --- .../ripper/rippers/Hentai2readRipper.java | 115 ++++++++++++++++++ 1 file changed, 115 insertions(+) create mode 100644 src/main/java/com/rarchives/ripme/ripper/rippers/Hentai2readRipper.java diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/Hentai2readRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/Hentai2readRipper.java new file mode 100644 index 00000000..8c8cf6f1 --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/Hentai2readRipper.java @@ -0,0 +1,115 @@ +package com.rarchives.ripme.ripper.rippers; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +import com.rarchives.ripme.ripper.AbstractHTMLRipper; +import com.rarchives.ripme.utils.Http; + +public class Hentai2readRipper extends AbstractHTMLRipper { + String lastPage; + + public Hentai2readRipper(URL url) throws IOException { + super(url); + } + + @Override + public String getHost() { + return "hentai2read"; + } + + @Override + public String getDomain() { + return "hentai2read.com"; + } + + @Override + public String getGID(URL url) throws MalformedURLException { + Pattern p = Pattern.compile("https://hentai2read\\.com/([a-zA-Z0-9_-]*)/?"); + Matcher m = p.matcher(url.toExternalForm()); + if (m.matches()) { + return m.group(1); + } + throw new MalformedURLException("Expected hentai2read.com URL format: " + + "hbrowse.com/COMICID - got " + url + " instead"); + } + + @Override + public Document getFirstPage() throws IOException { + Document tempDoc; + // get the first page of the comic + if (url.toExternalForm().substring(url.toExternalForm().length() - 1).equals("/")) { + tempDoc = Http.url(url + "1").get(); + } else { + tempDoc = Http.url(url + "/1").get(); + } + for (Element el : tempDoc.select("ul.nav > li > a")) { + if (el.attr("href").startsWith("https://hentai2read.com/thumbnails/")) { + // Get the page with the thumbnails + return Http.url(el.attr("href")).get(); + } + } + throw new IOException("Unable to get first page"); + } + + @Override + public String getAlbumTitle(URL url) throws MalformedURLException { + try { + Document doc = getFirstPage(); + String title = doc.select("span[itemprop=title]").text(); + return getHost() + "_" + title; + } catch (Exception e) { + // Fall back to default album naming convention + logger.warn("Failed to get album title from " + url, e); + } + return super.getAlbumTitle(url); + } + + @Override + public List getURLsFromPage(Document doc) { + List result = new ArrayList(); + for (Element el : doc.select("div.block-content > div > div.img-container > a > img.img-responsive")) { + String imageURL = "https:" + el.attr("src"); + imageURL = imageURL.replace("hentaicdn.com", "static.hentaicdn.com"); + imageURL = imageURL.replace("thumbnails/", ""); + imageURL = imageURL.replace("tmb", ""); + result.add(imageURL); + } + return result; + } + + @Override + public Document getNextPage(Document doc) throws IOException { + // Find next page + String nextUrl = ""; + Element elem = doc.select("div.bg-white > ul.pagination > li > a").last(); + if (elem == null) { + throw new IOException("No more pages"); + } + nextUrl = elem.attr("href"); + // We use the global lastPage to check if we've already ripped this page + // and is so we quit as there are no more pages + if (nextUrl.equals(lastPage)) { + throw new IOException("No more pages"); + } + lastPage = nextUrl; + // Sleep for half a sec to avoid getting IP banned + sleep(500); + return Http.url(nextUrl).get(); + } + + @Override + public void downloadURL(URL url, int index) { + addURLToDownload(url, getPrefix(index)); + } + } From a084ffe08bb5dab4f6d0cfb65433e61ae4f8fd26 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Wed, 1 Nov 2017 13:07:35 -0400 Subject: [PATCH 2/2] Fixed hentai2read ripper formatting --- .../com/rarchives/ripme/ripper/rippers/Hentai2readRipper.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/Hentai2readRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/Hentai2readRipper.java index 8c8cf6f1..897d949b 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/Hentai2readRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/Hentai2readRipper.java @@ -106,7 +106,7 @@ public class Hentai2readRipper extends AbstractHTMLRipper { // Sleep for half a sec to avoid getting IP banned sleep(500); return Http.url(nextUrl).get(); - } + } @Override public void downloadURL(URL url, int index) {