From 5554d1cc2f41814360852bb75017d18212fa5503 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Mon, 21 May 2018 20:16:31 -0400 Subject: [PATCH] Added the ability to blacklist tags on nhentai --- .../ripme/ripper/rippers/EHentaiRipper.java | 44 +++++++++++++++++++ .../tst/ripper/rippers/EhentaiRipperTest.java | 15 +++++++ 2 files changed, 59 insertions(+) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/EHentaiRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/EHentaiRipper.java index 8bd87cb2..beeb60b5 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/EHentaiRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/EHentaiRipper.java @@ -11,6 +11,7 @@ import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; +import com.rarchives.ripme.ui.RipStatusMessage; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; @@ -125,12 +126,55 @@ public class EHentaiRipper extends AbstractHTMLRipper { } } + /** + * Checks for blacklisted tags on page. If it finds one it returns it, if not it return null + * + * @param doc + * @return String + */ + public String checkTags(Document doc, String[] blackListedTags) { + // If the user hasn't blacklisted any tags we return null; + if (blackListedTags == null) { + return null; + } + logger.info("Blacklisted tags " + blackListedTags[0]); + List tagsOnPage = getTags(doc); + for (String tag : blackListedTags) { + for (String pageTag : tagsOnPage) { + // We replace all dashes in the tag with spaces because the tags we get from the site are separated using + // dashes + if (tag.trim().toLowerCase().equals(pageTag.toLowerCase())) { + return tag; + } + } + } + return null; + } + + private List getTags(Document doc) { + List tags = new ArrayList<>(); + logger.info("Getting tags"); + for (Element tag : doc.select("td > div > a")) { + logger.info("Found tag " + tag.text()); + tags.add(tag.text()); + } + return tags; + } + + @Override public Document getFirstPage() throws IOException { if (albumDoc == null) { albumDoc = getPageWithRetries(this.url); } this.lastURL = this.url.toExternalForm(); + logger.info("Checking blacklist"); + String blacklistedTag = checkTags(albumDoc, Utils.getConfigStringArray("ehentai.blacklist.tags")); + if (blacklistedTag != null) { + sendUpdate(RipStatusMessage.STATUS.DOWNLOAD_WARN, "Skipping " + url.toExternalForm() + " as it " + + "contains the blacklisted tag \"" + blacklistedTag + "\""); + return null; + } return albumDoc; } diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/EhentaiRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/EhentaiRipperTest.java index 00a3f8b6..cdab6b73 100644 --- a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/EhentaiRipperTest.java +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/EhentaiRipperTest.java @@ -10,4 +10,19 @@ public class EhentaiRipperTest extends RippersTest { EHentaiRipper ripper = new EHentaiRipper(new URL("https://e-hentai.org/g/1144492/e823bdf9a5/")); testRipper(ripper); } + + // Test the tag black listing + public void testTagBlackList() throws IOException { + URL url = new URL("https://e-hentai.org/g/1228503/1a2f455f96/"); + EHentaiRipper ripper = new EHentaiRipper(url); + // Test multiple blacklisted tags + String[] tags = {"test", "one", "yuri"}; + String blacklistedTag = ripper.checkTags(ripper.getFirstPage(), tags); + assertEquals("yuri", blacklistedTag); + + // test tags with spaces in them + String[] tags2 = {"test", "one", "midnight on mars"}; + blacklistedTag = ripper.checkTags(ripper.getFirstPage(), tags2); + assertEquals("midnight on mars", blacklistedTag); + } } \ No newline at end of file