From 3d30df196f40c3ffdbe3039f014929fa144e7803 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Tue, 22 May 2018 15:34:49 -0400 Subject: [PATCH 1/2] Rewrote nhentairipper to be faster --- .../ripme/ripper/rippers/NhentaiRipper.java | 76 +------------------ 1 file changed, 3 insertions(+), 73 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/NhentaiRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/NhentaiRipper.java index 9c204a8d..6b833941 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/NhentaiRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/NhentaiRipper.java @@ -20,9 +20,6 @@ import java.util.regex.Pattern; public class NhentaiRipper extends AbstractHTMLRipper { - // All sleep times are in milliseconds - private static final int IMAGE_SLEEP_TIME = 1500; - private String albumTitle; private Document firstPage; @@ -129,84 +126,17 @@ public class NhentaiRipper extends AbstractHTMLRipper { @Override public List getURLsFromPage(Document page) { List imageURLs = new ArrayList<>(); - Elements thumbs = page.select(".gallerythumb"); + Elements thumbs = page.select("a.gallerythumb > img"); for (Element el : thumbs) { - String imageUrl = el.attr("href"); - imageURLs.add("https://nhentai.net" + imageUrl); + imageURLs.add(el.attr("data-src").replaceAll("t\\.n", "i.n").replaceAll("t\\.", ".")); } return imageURLs; } @Override public void downloadURL(URL url, int index) { - NHentaiImageThread t = new NHentaiImageThread(url, index, this.workingDir); - nhentaiThreadPool.addThread(t); - try { - Thread.sleep(IMAGE_SLEEP_TIME); - } catch (InterruptedException e) { - logger.warn("Interrupted while waiting to load next image", e); - } + addURLToDownload(url, getPrefix(index), "", this.url.toExternalForm(), null); } - private class NHentaiImageThread extends Thread { - private URL url; - private int index; - private File workingDir; - - NHentaiImageThread(URL url, int index, File workingDir) { - super(); - this.url = url; - this.index = index; - this.workingDir = workingDir; - } - - @Override - public void run() { - fetchImage(); - } - - private void fetchImage() { - try { - //Document doc = getPageWithRetries(this.url); - Document doc = Http.url(this.url).get(); - - // Find image - Elements images = doc.select("#image-container > a > img"); - if (images.size() == 0) { - // Attempt to find image elsewise (Issue #41) - images = doc.select("img#img"); - if (images.size() == 0) { - logger.warn("Image not found at " + this.url); - return; - } - } - Element image = images.first(); - String imgsrc = image.attr("src"); - logger.info("Found URL " + imgsrc + " via " + images.get(0)); - - Pattern p = Pattern.compile("^https?://i.nhentai.net/galleries/\\d+/(.+)$"); - Matcher m = p.matcher(imgsrc); - if (m.matches()) { - // Manually discover filename from URL - String savePath = this.workingDir + File.separator; - if (Utils.getConfigBoolean("download.save_order", true)) { - savePath += String.format("%03d_", index); - } - savePath += m.group(1); - addURLToDownload(new URL(imgsrc), new File(savePath)); - } else { - // Provide prefix and let the AbstractRipper "guess" the filename - String prefix = ""; - if (Utils.getConfigBoolean("download.save_order", true)) { - prefix = String.format("%03d_", index); - } - addURLToDownload(new URL(imgsrc), prefix); - } - } catch (IOException e) { - logger.error("[!] Exception while loading/parsing " + this.url, e); - } - } - - } } From ffdb5fc27d785b066299ab5bc74511d93d8c8fc5 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Tue, 22 May 2018 15:51:55 -0400 Subject: [PATCH 2/2] Added quickQueue support to nhentairipper --- .../ripme/ripper/rippers/NhentaiRipper.java | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/NhentaiRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/NhentaiRipper.java index 6b833941..5dc3fd93 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/NhentaiRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/NhentaiRipper.java @@ -26,6 +26,30 @@ public class NhentaiRipper extends AbstractHTMLRipper { // Thread pool for finding direct image links from "image" pages (html) private DownloadThreadPool nhentaiThreadPool = new DownloadThreadPool("nhentai"); + @Override + public boolean hasQueueSupport() { + return true; + } + + @Override + public boolean pageContainsAlbums(URL url) { + Pattern pa = Pattern.compile("^https?://nhentai\\.net/tag/([a-zA-Z0-9_\\-]+)/?"); + Matcher ma = pa.matcher(url.toExternalForm()); + if (ma.matches()) { + return true; + } + return false; + } + + @Override + public List getAlbumsToQueue(Document doc) { + List urlsToAddToQueue = new ArrayList<>(); + for (Element elem : doc.select("a.cover")) { + urlsToAddToQueue.add("https://" + getDomain() + elem.attr("href")); + } + return urlsToAddToQueue; + } + @Override public DownloadThreadPool getThreadPool() { return nhentaiThreadPool;