Merge pull request #941 from cyian-1756/tagBlackListRefactoring
Tag black list refactoring
This commit is contained in:
commit
e8b65ab7d8
@ -12,6 +12,7 @@ import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import com.rarchives.ripme.ui.RipStatusMessage;
|
||||
import com.rarchives.ripme.utils.RipUtils;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
@ -126,32 +127,7 @@ public class EHentaiRipper extends AbstractHTMLRipper {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks for blacklisted tags on page. If it finds one it returns it, if not it return null
|
||||
*
|
||||
* @param doc
|
||||
* @return String
|
||||
*/
|
||||
public String checkTags(Document doc, String[] blackListedTags) {
|
||||
// If the user hasn't blacklisted any tags we return null;
|
||||
if (blackListedTags == null) {
|
||||
return null;
|
||||
}
|
||||
LOGGER.info("Blacklisted tags " + blackListedTags[0]);
|
||||
List<String> tagsOnPage = getTags(doc);
|
||||
for (String tag : blackListedTags) {
|
||||
for (String pageTag : tagsOnPage) {
|
||||
// We replace all dashes in the tag with spaces because the tags we get from the site are separated using
|
||||
// dashes
|
||||
if (tag.trim().toLowerCase().equals(pageTag.toLowerCase())) {
|
||||
return tag;
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private List<String> getTags(Document doc) {
|
||||
public List<String> getTags(Document doc) {
|
||||
List<String> tags = new ArrayList<>();
|
||||
LOGGER.info("Getting tags");
|
||||
for (Element tag : doc.select("td > div > a")) {
|
||||
@ -169,7 +145,7 @@ public class EHentaiRipper extends AbstractHTMLRipper {
|
||||
}
|
||||
this.lastURL = this.url.toExternalForm();
|
||||
LOGGER.info("Checking blacklist");
|
||||
String blacklistedTag = checkTags(albumDoc, Utils.getConfigStringArray("ehentai.blacklist.tags"));
|
||||
String blacklistedTag = RipUtils.checkTags(Utils.getConfigStringArray("ehentai.blacklist.tags"), getTags(albumDoc));
|
||||
if (blacklistedTag != null) {
|
||||
sendUpdate(RipStatusMessage.STATUS.DOWNLOAD_WARN, "Skipping " + url.toExternalForm() + " as it " +
|
||||
"contains the blacklisted tag \"" + blacklistedTag + "\"");
|
||||
|
@ -4,6 +4,7 @@ import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||
import com.rarchives.ripme.ripper.DownloadThreadPool;
|
||||
import com.rarchives.ripme.ui.RipStatusMessage;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
import com.rarchives.ripme.utils.RipUtils;
|
||||
import com.rarchives.ripme.utils.Utils;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
@ -82,39 +83,16 @@ public class NhentaiRipper extends AbstractHTMLRipper {
|
||||
return "nhentai" + title;
|
||||
}
|
||||
|
||||
private List<String> getTags(Document doc) {
|
||||
public List<String> getTags(Document doc) {
|
||||
List<String> tags = new ArrayList<>();
|
||||
for (Element tag : doc.select("a.tag")) {
|
||||
tags.add(tag.attr("href").replaceAll("/tag/", "").replaceAll("/", ""));
|
||||
String tagString = tag.attr("href").replaceAll("/tag/", "").replaceAll("/", "");
|
||||
LOGGER.info("Found tag: " + tagString);
|
||||
tags.add(tagString);
|
||||
}
|
||||
return tags;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks for blacklisted tags on page. If it finds one it returns it, if not it return null
|
||||
*
|
||||
* @param doc
|
||||
* @return String
|
||||
*/
|
||||
public String checkTags(Document doc, String[] blackListedTags) {
|
||||
// If the user hasn't blacklisted any tags we return false;
|
||||
if (blackListedTags == null) {
|
||||
return null;
|
||||
}
|
||||
LOGGER.info("Blacklisted tags " + blackListedTags[0]);
|
||||
List<String> tagsOnPage = getTags(doc);
|
||||
for (String tag : blackListedTags) {
|
||||
for (String pageTag : tagsOnPage) {
|
||||
// We replace all dashes in the tag with spaces because the tags we get from the site are separated using
|
||||
// dashes
|
||||
if (tag.trim().toLowerCase().equals(pageTag.replaceAll("-", " ").toLowerCase())) {
|
||||
return tag;
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getGID(URL url) throws MalformedURLException {
|
||||
// Ex: https://nhentai.net/g/159174/
|
||||
@ -134,7 +112,7 @@ public class NhentaiRipper extends AbstractHTMLRipper {
|
||||
firstPage = Http.url(url).get();
|
||||
}
|
||||
|
||||
String blacklistedTag = checkTags(firstPage, Utils.getConfigStringArray("nhentai.blacklist.tags"));
|
||||
String blacklistedTag = RipUtils.checkTags(Utils.getConfigStringArray("nhentai.blacklist.tags"), getTags(firstPage));
|
||||
if (blacklistedTag != null) {
|
||||
sendUpdate(RipStatusMessage.STATUS.DOWNLOAD_WARN, "Skipping " + url.toExternalForm() + " as it " +
|
||||
"contains the blacklisted tag \"" + blacklistedTag + "\"");
|
||||
|
@ -2,8 +2,10 @@ package com.rarchives.ripme.tst.ripper.rippers;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.URL;
|
||||
import java.util.List;
|
||||
|
||||
import com.rarchives.ripme.ripper.rippers.EHentaiRipper;
|
||||
import com.rarchives.ripme.utils.RipUtils;
|
||||
|
||||
public class EhentaiRipperTest extends RippersTest {
|
||||
public void testEHentaiAlbum() throws IOException {
|
||||
@ -15,14 +17,15 @@ public class EhentaiRipperTest extends RippersTest {
|
||||
public void testTagBlackList() throws IOException {
|
||||
URL url = new URL("https://e-hentai.org/g/1228503/1a2f455f96/");
|
||||
EHentaiRipper ripper = new EHentaiRipper(url);
|
||||
List<String> tagsOnPage = ripper.getTags(ripper.getFirstPage());
|
||||
// Test multiple blacklisted tags
|
||||
String[] tags = {"test", "one", "yuri"};
|
||||
String blacklistedTag = ripper.checkTags(ripper.getFirstPage(), tags);
|
||||
String blacklistedTag = RipUtils.checkTags(tags, tagsOnPage);
|
||||
assertEquals("yuri", blacklistedTag);
|
||||
|
||||
// test tags with spaces in them
|
||||
String[] tags2 = {"test", "one", "midnight on mars"};
|
||||
blacklistedTag = ripper.checkTags(ripper.getFirstPage(), tags2);
|
||||
blacklistedTag = RipUtils.checkTags(tags2, tagsOnPage);
|
||||
assertEquals("midnight on mars", blacklistedTag);
|
||||
}
|
||||
}
|
@ -2,8 +2,10 @@ package com.rarchives.ripme.tst.ripper.rippers;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.URL;
|
||||
import java.util.List;
|
||||
|
||||
import com.rarchives.ripme.ripper.rippers.NhentaiRipper;
|
||||
import com.rarchives.ripme.utils.RipUtils;
|
||||
|
||||
public class NhentaiRipperTest extends RippersTest {
|
||||
public void testRip() throws IOException {
|
||||
@ -20,14 +22,15 @@ public class NhentaiRipperTest extends RippersTest {
|
||||
public void testTagBlackList() throws IOException {
|
||||
URL url = new URL("https://nhentai.net/g/233295/");
|
||||
NhentaiRipper ripper = new NhentaiRipper(url);
|
||||
List<String> tagsOnPage = ripper.getTags(ripper.getFirstPage());
|
||||
// Test multiple blacklisted tags
|
||||
String[] tags = {"test", "one", "blowjob"};
|
||||
String blacklistedTag = ripper.checkTags(ripper.getFirstPage(), tags);
|
||||
String blacklistedTag = RipUtils.checkTags(tags, tagsOnPage);
|
||||
assertEquals("blowjob", blacklistedTag);
|
||||
|
||||
// test tags with spaces in them
|
||||
String[] tags2 = {"test", "one", "sole female"};
|
||||
blacklistedTag = ripper.checkTags(ripper.getFirstPage(), tags2);
|
||||
assertEquals("sole female", blacklistedTag);
|
||||
String[] tags2 = {"test", "one", "sole-female"};
|
||||
blacklistedTag = RipUtils.checkTags(tags2, tagsOnPage);
|
||||
assertEquals("sole-female", blacklistedTag);
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user