From aca2d24a65cf78bd4748a09fdb3c939da30d6a13 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Sun, 13 May 2018 12:12:23 -0400 Subject: [PATCH 1/3] Can now blacklist nhentai tags --- .../ripme/ripper/rippers/NhentaiRipper.java | 41 +++++++++++++++++++ .../java/com/rarchives/ripme/utils/Utils.java | 10 +++++ 2 files changed, 51 insertions(+) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/NhentaiRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/NhentaiRipper.java index ac8f782d..50334f05 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/NhentaiRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/NhentaiRipper.java @@ -2,6 +2,7 @@ package com.rarchives.ripme.ripper.rippers; import com.rarchives.ripme.ripper.AbstractHTMLRipper; import com.rarchives.ripme.ripper.DownloadThreadPool; +import com.rarchives.ripme.ui.RipStatusMessage; import com.rarchives.ripme.utils.Http; import com.rarchives.ripme.utils.Utils; import org.jsoup.nodes.Document; @@ -64,6 +65,39 @@ public class NhentaiRipper extends AbstractHTMLRipper { return "nhentai" + title; } + private List getTags(Document doc) { + List tags = new ArrayList<>(); + for (Element tag : doc.select("a.tag")) { + tags.add(tag.attr("href").replaceAll("/tag/", "").replaceAll("/", "")); + } + return tags; + } + + /** + * Checks for blacklisted tags on page. If it finds one it returns it, if not it return null + * + * @param doc + * @return String + */ + private String checkTags(Document doc) { + String[] blackListedTags = Utils.getConfigStringArray("nhentai.blacklist.tags"); + // If the user hasn't blacklisted any tags we return false; + if (blackListedTags == null) { + return null; + } + logger.info("Blacklisted tags " + blackListedTags[0]); + List tagsOnPage = getTags(doc); + for (String tag : blackListedTags) { + for (String pageTag : tagsOnPage) { + logger.info("tag: " + tag + " pageTag: " + pageTag); + if (tag.trim().toLowerCase().equals(pageTag.toLowerCase())) { + return tag; + } + } + } + return null; + } + @Override public String getGID(URL url) throws MalformedURLException { // Ex: https://nhentai.net/g/159174/ @@ -82,6 +116,13 @@ public class NhentaiRipper extends AbstractHTMLRipper { if (firstPage == null) { firstPage = Http.url(url).get(); } + + String blacklistedTag = checkTags(firstPage); + if (blacklistedTag != null) { + sendUpdate(RipStatusMessage.STATUS.DOWNLOAD_WARN, "Skipping " + url.toExternalForm() + " as it " + + "contains the blacklisted tag \"" + blacklistedTag + "\""); + return null; + } return firstPage; } diff --git a/src/main/java/com/rarchives/ripme/utils/Utils.java b/src/main/java/com/rarchives/ripme/utils/Utils.java index cd1048df..5560a425 100644 --- a/src/main/java/com/rarchives/ripme/utils/Utils.java +++ b/src/main/java/com/rarchives/ripme/utils/Utils.java @@ -92,6 +92,16 @@ public class Utils { public static String getConfigString(String key, String defaultValue) { return config.getString(key, defaultValue); } + + public static String[] getConfigStringArray(String key) { + String[] s = config.getStringArray(key); + if (s.length == 0) { + return null; + } else { + return s; + } + } + public static int getConfigInteger(String key, int defaultValue) { return config.getInt(key, defaultValue); } From 05cc8cec615dc5dc9d0d5888011c18a77ab569d1 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Mon, 14 May 2018 03:26:32 -0400 Subject: [PATCH 2/3] Added unit test for nhentai --- .../tst/ripper/rippers/NhentaiRipperTest.java | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 src/test/java/com/rarchives/ripme/tst/ripper/rippers/NhentaiRipperTest.java diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/NhentaiRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/NhentaiRipperTest.java new file mode 100644 index 00000000..108feed2 --- /dev/null +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/NhentaiRipperTest.java @@ -0,0 +1,33 @@ +package com.rarchives.ripme.tst.ripper.rippers; + +import java.io.IOException; +import java.net.URL; + +import com.rarchives.ripme.ripper.rippers.NhentaiRipper; + +public class NhentaiRipperTest extends RippersTest { + public void testRip() throws IOException { + NhentaiRipper ripper = new NhentaiRipper(new URL("https://nhentai.net/g/233295/")); + testRipper(ripper); + } + + public void testGetGID() throws IOException { + NhentaiRipper ripper = new NhentaiRipper(new URL("https://nhentai.net/g/233295/")); + assertEquals("233295", ripper.getGID(new URL("https://nhentai.net/g/233295/"))); + } + + // Test the tag black listing + public void testTagBlackList() throws IOException { + URL url = new URL("https://nhentai.net/g/233295/"); + NhentaiRipper ripper = new NhentaiRipper(url); + // Test multiple blacklisted tags + String[] tags = {"test", "one", "blowjob"}; + String blacklistedTag = ripper.checkTags(ripper.getFirstPage(), tags); + assertEquals("blowjob", blacklistedTag); + + // test tags with spaces in them + String[] tags2 = {"test", "one", "sole female"}; + blacklistedTag = ripper.checkTags(ripper.getFirstPage(), tags2); + assertEquals("sole female", blacklistedTag); + } +} From 4dcfe3b712cd6b34f5279161c937e23ef3c5d911 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Mon, 14 May 2018 03:26:56 -0400 Subject: [PATCH 3/3] Can now black list tags with spaces in them --- .../rarchives/ripme/ripper/rippers/NhentaiRipper.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/NhentaiRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/NhentaiRipper.java index 50334f05..9c204a8d 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/NhentaiRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/NhentaiRipper.java @@ -79,8 +79,7 @@ public class NhentaiRipper extends AbstractHTMLRipper { * @param doc * @return String */ - private String checkTags(Document doc) { - String[] blackListedTags = Utils.getConfigStringArray("nhentai.blacklist.tags"); + public String checkTags(Document doc, String[] blackListedTags) { // If the user hasn't blacklisted any tags we return false; if (blackListedTags == null) { return null; @@ -89,8 +88,9 @@ public class NhentaiRipper extends AbstractHTMLRipper { List tagsOnPage = getTags(doc); for (String tag : blackListedTags) { for (String pageTag : tagsOnPage) { - logger.info("tag: " + tag + " pageTag: " + pageTag); - if (tag.trim().toLowerCase().equals(pageTag.toLowerCase())) { + // We replace all dashes in the tag with spaces because the tags we get from the site are separated using + // dashes + if (tag.trim().toLowerCase().equals(pageTag.replaceAll("-", " ").toLowerCase())) { return tag; } } @@ -117,7 +117,7 @@ public class NhentaiRipper extends AbstractHTMLRipper { firstPage = Http.url(url).get(); } - String blacklistedTag = checkTags(firstPage); + String blacklistedTag = checkTags(firstPage, Utils.getConfigStringArray("nhentai.blacklist.tags")); if (blacklistedTag != null) { sendUpdate(RipStatusMessage.STATUS.DOWNLOAD_WARN, "Skipping " + url.toExternalForm() + " as it " + "contains the blacklisted tag \"" + blacklistedTag + "\"");