Merge pull request #941 from cyian-1756/tagBlackListRefactoring

Tag black list refactoring
This commit is contained in:
cyian-1756 2018-09-14 14:13:59 -04:00 committed by GitHub
commit e8b65ab7d8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 21 additions and 61 deletions

View File

@ -12,6 +12,7 @@ import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import com.rarchives.ripme.ui.RipStatusMessage; import com.rarchives.ripme.ui.RipStatusMessage;
import com.rarchives.ripme.utils.RipUtils;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element; import org.jsoup.nodes.Element;
import org.jsoup.select.Elements; import org.jsoup.select.Elements;
@ -126,32 +127,7 @@ public class EHentaiRipper extends AbstractHTMLRipper {
} }
} }
/** public List<String> getTags(Document doc) {
* Checks for blacklisted tags on page. If it finds one it returns it, if not it return null
*
* @param doc
* @return String
*/
public String checkTags(Document doc, String[] blackListedTags) {
// If the user hasn't blacklisted any tags we return null;
if (blackListedTags == null) {
return null;
}
LOGGER.info("Blacklisted tags " + blackListedTags[0]);
List<String> tagsOnPage = getTags(doc);
for (String tag : blackListedTags) {
for (String pageTag : tagsOnPage) {
// We replace all dashes in the tag with spaces because the tags we get from the site are separated using
// dashes
if (tag.trim().toLowerCase().equals(pageTag.toLowerCase())) {
return tag;
}
}
}
return null;
}
private List<String> getTags(Document doc) {
List<String> tags = new ArrayList<>(); List<String> tags = new ArrayList<>();
LOGGER.info("Getting tags"); LOGGER.info("Getting tags");
for (Element tag : doc.select("td > div > a")) { for (Element tag : doc.select("td > div > a")) {
@ -169,7 +145,7 @@ public class EHentaiRipper extends AbstractHTMLRipper {
} }
this.lastURL = this.url.toExternalForm(); this.lastURL = this.url.toExternalForm();
LOGGER.info("Checking blacklist"); LOGGER.info("Checking blacklist");
String blacklistedTag = checkTags(albumDoc, Utils.getConfigStringArray("ehentai.blacklist.tags")); String blacklistedTag = RipUtils.checkTags(Utils.getConfigStringArray("ehentai.blacklist.tags"), getTags(albumDoc));
if (blacklistedTag != null) { if (blacklistedTag != null) {
sendUpdate(RipStatusMessage.STATUS.DOWNLOAD_WARN, "Skipping " + url.toExternalForm() + " as it " + sendUpdate(RipStatusMessage.STATUS.DOWNLOAD_WARN, "Skipping " + url.toExternalForm() + " as it " +
"contains the blacklisted tag \"" + blacklistedTag + "\""); "contains the blacklisted tag \"" + blacklistedTag + "\"");

View File

@ -4,6 +4,7 @@ import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.ripper.DownloadThreadPool; import com.rarchives.ripme.ripper.DownloadThreadPool;
import com.rarchives.ripme.ui.RipStatusMessage; import com.rarchives.ripme.ui.RipStatusMessage;
import com.rarchives.ripme.utils.Http; import com.rarchives.ripme.utils.Http;
import com.rarchives.ripme.utils.RipUtils;
import com.rarchives.ripme.utils.Utils; import com.rarchives.ripme.utils.Utils;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element; import org.jsoup.nodes.Element;
@ -82,39 +83,16 @@ public class NhentaiRipper extends AbstractHTMLRipper {
return "nhentai" + title; return "nhentai" + title;
} }
private List<String> getTags(Document doc) { public List<String> getTags(Document doc) {
List<String> tags = new ArrayList<>(); List<String> tags = new ArrayList<>();
for (Element tag : doc.select("a.tag")) { for (Element tag : doc.select("a.tag")) {
tags.add(tag.attr("href").replaceAll("/tag/", "").replaceAll("/", "")); String tagString = tag.attr("href").replaceAll("/tag/", "").replaceAll("/", "");
LOGGER.info("Found tag: " + tagString);
tags.add(tagString);
} }
return tags; return tags;
} }
/**
* Checks for blacklisted tags on page. If it finds one it returns it, if not it return null
*
* @param doc
* @return String
*/
public String checkTags(Document doc, String[] blackListedTags) {
// If the user hasn't blacklisted any tags we return false;
if (blackListedTags == null) {
return null;
}
LOGGER.info("Blacklisted tags " + blackListedTags[0]);
List<String> tagsOnPage = getTags(doc);
for (String tag : blackListedTags) {
for (String pageTag : tagsOnPage) {
// We replace all dashes in the tag with spaces because the tags we get from the site are separated using
// dashes
if (tag.trim().toLowerCase().equals(pageTag.replaceAll("-", " ").toLowerCase())) {
return tag;
}
}
}
return null;
}
@Override @Override
public String getGID(URL url) throws MalformedURLException { public String getGID(URL url) throws MalformedURLException {
// Ex: https://nhentai.net/g/159174/ // Ex: https://nhentai.net/g/159174/
@ -134,7 +112,7 @@ public class NhentaiRipper extends AbstractHTMLRipper {
firstPage = Http.url(url).get(); firstPage = Http.url(url).get();
} }
String blacklistedTag = checkTags(firstPage, Utils.getConfigStringArray("nhentai.blacklist.tags")); String blacklistedTag = RipUtils.checkTags(Utils.getConfigStringArray("nhentai.blacklist.tags"), getTags(firstPage));
if (blacklistedTag != null) { if (blacklistedTag != null) {
sendUpdate(RipStatusMessage.STATUS.DOWNLOAD_WARN, "Skipping " + url.toExternalForm() + " as it " + sendUpdate(RipStatusMessage.STATUS.DOWNLOAD_WARN, "Skipping " + url.toExternalForm() + " as it " +
"contains the blacklisted tag \"" + blacklistedTag + "\""); "contains the blacklisted tag \"" + blacklistedTag + "\"");

View File

@ -2,8 +2,10 @@ package com.rarchives.ripme.tst.ripper.rippers;
import java.io.IOException; import java.io.IOException;
import java.net.URL; import java.net.URL;
import java.util.List;
import com.rarchives.ripme.ripper.rippers.EHentaiRipper; import com.rarchives.ripme.ripper.rippers.EHentaiRipper;
import com.rarchives.ripme.utils.RipUtils;
public class EhentaiRipperTest extends RippersTest { public class EhentaiRipperTest extends RippersTest {
public void testEHentaiAlbum() throws IOException { public void testEHentaiAlbum() throws IOException {
@ -15,14 +17,15 @@ public class EhentaiRipperTest extends RippersTest {
public void testTagBlackList() throws IOException { public void testTagBlackList() throws IOException {
URL url = new URL("https://e-hentai.org/g/1228503/1a2f455f96/"); URL url = new URL("https://e-hentai.org/g/1228503/1a2f455f96/");
EHentaiRipper ripper = new EHentaiRipper(url); EHentaiRipper ripper = new EHentaiRipper(url);
List<String> tagsOnPage = ripper.getTags(ripper.getFirstPage());
// Test multiple blacklisted tags // Test multiple blacklisted tags
String[] tags = {"test", "one", "yuri"}; String[] tags = {"test", "one", "yuri"};
String blacklistedTag = ripper.checkTags(ripper.getFirstPage(), tags); String blacklistedTag = RipUtils.checkTags(tags, tagsOnPage);
assertEquals("yuri", blacklistedTag); assertEquals("yuri", blacklistedTag);
// test tags with spaces in them // test tags with spaces in them
String[] tags2 = {"test", "one", "midnight on mars"}; String[] tags2 = {"test", "one", "midnight on mars"};
blacklistedTag = ripper.checkTags(ripper.getFirstPage(), tags2); blacklistedTag = RipUtils.checkTags(tags2, tagsOnPage);
assertEquals("midnight on mars", blacklistedTag); assertEquals("midnight on mars", blacklistedTag);
} }
} }

View File

@ -2,8 +2,10 @@ package com.rarchives.ripme.tst.ripper.rippers;
import java.io.IOException; import java.io.IOException;
import java.net.URL; import java.net.URL;
import java.util.List;
import com.rarchives.ripme.ripper.rippers.NhentaiRipper; import com.rarchives.ripme.ripper.rippers.NhentaiRipper;
import com.rarchives.ripme.utils.RipUtils;
public class NhentaiRipperTest extends RippersTest { public class NhentaiRipperTest extends RippersTest {
public void testRip() throws IOException { public void testRip() throws IOException {
@ -20,14 +22,15 @@ public class NhentaiRipperTest extends RippersTest {
public void testTagBlackList() throws IOException { public void testTagBlackList() throws IOException {
URL url = new URL("https://nhentai.net/g/233295/"); URL url = new URL("https://nhentai.net/g/233295/");
NhentaiRipper ripper = new NhentaiRipper(url); NhentaiRipper ripper = new NhentaiRipper(url);
List<String> tagsOnPage = ripper.getTags(ripper.getFirstPage());
// Test multiple blacklisted tags // Test multiple blacklisted tags
String[] tags = {"test", "one", "blowjob"}; String[] tags = {"test", "one", "blowjob"};
String blacklistedTag = ripper.checkTags(ripper.getFirstPage(), tags); String blacklistedTag = RipUtils.checkTags(tags, tagsOnPage);
assertEquals("blowjob", blacklistedTag); assertEquals("blowjob", blacklistedTag);
// test tags with spaces in them // test tags with spaces in them
String[] tags2 = {"test", "one", "sole female"}; String[] tags2 = {"test", "one", "sole-female"};
blacklistedTag = ripper.checkTags(ripper.getFirstPage(), tags2); blacklistedTag = RipUtils.checkTags(tags2, tagsOnPage);
assertEquals("sole female", blacklistedTag); assertEquals("sole-female", blacklistedTag);
} }
} }