Added the ability to blacklist tags on nhentai
This commit is contained in:
parent
d11dfe814b
commit
5554d1cc2f
@ -11,6 +11,7 @@ import java.util.Map;
|
|||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
import com.rarchives.ripme.ui.RipStatusMessage;
|
||||||
import org.jsoup.nodes.Document;
|
import org.jsoup.nodes.Document;
|
||||||
import org.jsoup.nodes.Element;
|
import org.jsoup.nodes.Element;
|
||||||
import org.jsoup.select.Elements;
|
import org.jsoup.select.Elements;
|
||||||
@ -125,12 +126,55 @@ public class EHentaiRipper extends AbstractHTMLRipper {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks for blacklisted tags on page. If it finds one it returns it, if not it return null
|
||||||
|
*
|
||||||
|
* @param doc
|
||||||
|
* @return String
|
||||||
|
*/
|
||||||
|
public String checkTags(Document doc, String[] blackListedTags) {
|
||||||
|
// If the user hasn't blacklisted any tags we return null;
|
||||||
|
if (blackListedTags == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
logger.info("Blacklisted tags " + blackListedTags[0]);
|
||||||
|
List<String> tagsOnPage = getTags(doc);
|
||||||
|
for (String tag : blackListedTags) {
|
||||||
|
for (String pageTag : tagsOnPage) {
|
||||||
|
// We replace all dashes in the tag with spaces because the tags we get from the site are separated using
|
||||||
|
// dashes
|
||||||
|
if (tag.trim().toLowerCase().equals(pageTag.toLowerCase())) {
|
||||||
|
return tag;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<String> getTags(Document doc) {
|
||||||
|
List<String> tags = new ArrayList<>();
|
||||||
|
logger.info("Getting tags");
|
||||||
|
for (Element tag : doc.select("td > div > a")) {
|
||||||
|
logger.info("Found tag " + tag.text());
|
||||||
|
tags.add(tag.text());
|
||||||
|
}
|
||||||
|
return tags;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Document getFirstPage() throws IOException {
|
public Document getFirstPage() throws IOException {
|
||||||
if (albumDoc == null) {
|
if (albumDoc == null) {
|
||||||
albumDoc = getPageWithRetries(this.url);
|
albumDoc = getPageWithRetries(this.url);
|
||||||
}
|
}
|
||||||
this.lastURL = this.url.toExternalForm();
|
this.lastURL = this.url.toExternalForm();
|
||||||
|
logger.info("Checking blacklist");
|
||||||
|
String blacklistedTag = checkTags(albumDoc, Utils.getConfigStringArray("ehentai.blacklist.tags"));
|
||||||
|
if (blacklistedTag != null) {
|
||||||
|
sendUpdate(RipStatusMessage.STATUS.DOWNLOAD_WARN, "Skipping " + url.toExternalForm() + " as it " +
|
||||||
|
"contains the blacklisted tag \"" + blacklistedTag + "\"");
|
||||||
|
return null;
|
||||||
|
}
|
||||||
return albumDoc;
|
return albumDoc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -10,4 +10,19 @@ public class EhentaiRipperTest extends RippersTest {
|
|||||||
EHentaiRipper ripper = new EHentaiRipper(new URL("https://e-hentai.org/g/1144492/e823bdf9a5/"));
|
EHentaiRipper ripper = new EHentaiRipper(new URL("https://e-hentai.org/g/1144492/e823bdf9a5/"));
|
||||||
testRipper(ripper);
|
testRipper(ripper);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Test the tag black listing
|
||||||
|
public void testTagBlackList() throws IOException {
|
||||||
|
URL url = new URL("https://e-hentai.org/g/1228503/1a2f455f96/");
|
||||||
|
EHentaiRipper ripper = new EHentaiRipper(url);
|
||||||
|
// Test multiple blacklisted tags
|
||||||
|
String[] tags = {"test", "one", "yuri"};
|
||||||
|
String blacklistedTag = ripper.checkTags(ripper.getFirstPage(), tags);
|
||||||
|
assertEquals("yuri", blacklistedTag);
|
||||||
|
|
||||||
|
// test tags with spaces in them
|
||||||
|
String[] tags2 = {"test", "one", "midnight on mars"};
|
||||||
|
blacklistedTag = ripper.checkTags(ripper.getFirstPage(), tags2);
|
||||||
|
assertEquals("midnight on mars", blacklistedTag);
|
||||||
|
}
|
||||||
}
|
}
|
Loading…
Reference in New Issue
Block a user