Merge pull request #941 from cyian-1756/tagBlackListRefactoring

Tag black list refactoring
This commit is contained in:
cyian-1756 2018-09-14 14:13:59 -04:00 committed by GitHub
commit e8b65ab7d8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 21 additions and 61 deletions

View File

@ -12,6 +12,7 @@ import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.rarchives.ripme.ui.RipStatusMessage;
import com.rarchives.ripme.utils.RipUtils;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
@ -126,32 +127,7 @@ public class EHentaiRipper extends AbstractHTMLRipper {
}
}
/**
* Checks for blacklisted tags on page. If it finds one it returns it, if not it return null
*
* @param doc
* @return String
*/
public String checkTags(Document doc, String[] blackListedTags) {
// If the user hasn't blacklisted any tags we return null;
if (blackListedTags == null) {
return null;
}
LOGGER.info("Blacklisted tags " + blackListedTags[0]);
List<String> tagsOnPage = getTags(doc);
for (String tag : blackListedTags) {
for (String pageTag : tagsOnPage) {
// We replace all dashes in the tag with spaces because the tags we get from the site are separated using
// dashes
if (tag.trim().toLowerCase().equals(pageTag.toLowerCase())) {
return tag;
}
}
}
return null;
}
private List<String> getTags(Document doc) {
public List<String> getTags(Document doc) {
List<String> tags = new ArrayList<>();
LOGGER.info("Getting tags");
for (Element tag : doc.select("td > div > a")) {
@ -169,7 +145,7 @@ public class EHentaiRipper extends AbstractHTMLRipper {
}
this.lastURL = this.url.toExternalForm();
LOGGER.info("Checking blacklist");
String blacklistedTag = checkTags(albumDoc, Utils.getConfigStringArray("ehentai.blacklist.tags"));
String blacklistedTag = RipUtils.checkTags(Utils.getConfigStringArray("ehentai.blacklist.tags"), getTags(albumDoc));
if (blacklistedTag != null) {
sendUpdate(RipStatusMessage.STATUS.DOWNLOAD_WARN, "Skipping " + url.toExternalForm() + " as it " +
"contains the blacklisted tag \"" + blacklistedTag + "\"");

View File

@ -4,6 +4,7 @@ import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.ripper.DownloadThreadPool;
import com.rarchives.ripme.ui.RipStatusMessage;
import com.rarchives.ripme.utils.Http;
import com.rarchives.ripme.utils.RipUtils;
import com.rarchives.ripme.utils.Utils;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
@ -82,39 +83,16 @@ public class NhentaiRipper extends AbstractHTMLRipper {
return "nhentai" + title;
}
private List<String> getTags(Document doc) {
public List<String> getTags(Document doc) {
List<String> tags = new ArrayList<>();
for (Element tag : doc.select("a.tag")) {
tags.add(tag.attr("href").replaceAll("/tag/", "").replaceAll("/", ""));
String tagString = tag.attr("href").replaceAll("/tag/", "").replaceAll("/", "");
LOGGER.info("Found tag: " + tagString);
tags.add(tagString);
}
return tags;
}
/**
* Checks for blacklisted tags on page. If it finds one it returns it, if not it return null
*
* @param doc
* @return String
*/
public String checkTags(Document doc, String[] blackListedTags) {
// If the user hasn't blacklisted any tags we return false;
if (blackListedTags == null) {
return null;
}
LOGGER.info("Blacklisted tags " + blackListedTags[0]);
List<String> tagsOnPage = getTags(doc);
for (String tag : blackListedTags) {
for (String pageTag : tagsOnPage) {
// We replace all dashes in the tag with spaces because the tags we get from the site are separated using
// dashes
if (tag.trim().toLowerCase().equals(pageTag.replaceAll("-", " ").toLowerCase())) {
return tag;
}
}
}
return null;
}
@Override
public String getGID(URL url) throws MalformedURLException {
// Ex: https://nhentai.net/g/159174/
@ -134,7 +112,7 @@ public class NhentaiRipper extends AbstractHTMLRipper {
firstPage = Http.url(url).get();
}
String blacklistedTag = checkTags(firstPage, Utils.getConfigStringArray("nhentai.blacklist.tags"));
String blacklistedTag = RipUtils.checkTags(Utils.getConfigStringArray("nhentai.blacklist.tags"), getTags(firstPage));
if (blacklistedTag != null) {
sendUpdate(RipStatusMessage.STATUS.DOWNLOAD_WARN, "Skipping " + url.toExternalForm() + " as it " +
"contains the blacklisted tag \"" + blacklistedTag + "\"");

View File

@ -2,8 +2,10 @@ package com.rarchives.ripme.tst.ripper.rippers;
import java.io.IOException;
import java.net.URL;
import java.util.List;
import com.rarchives.ripme.ripper.rippers.EHentaiRipper;
import com.rarchives.ripme.utils.RipUtils;
public class EhentaiRipperTest extends RippersTest {
public void testEHentaiAlbum() throws IOException {
@ -15,14 +17,15 @@ public class EhentaiRipperTest extends RippersTest {
public void testTagBlackList() throws IOException {
URL url = new URL("https://e-hentai.org/g/1228503/1a2f455f96/");
EHentaiRipper ripper = new EHentaiRipper(url);
List<String> tagsOnPage = ripper.getTags(ripper.getFirstPage());
// Test multiple blacklisted tags
String[] tags = {"test", "one", "yuri"};
String blacklistedTag = ripper.checkTags(ripper.getFirstPage(), tags);
String blacklistedTag = RipUtils.checkTags(tags, tagsOnPage);
assertEquals("yuri", blacklistedTag);
// test tags with spaces in them
String[] tags2 = {"test", "one", "midnight on mars"};
blacklistedTag = ripper.checkTags(ripper.getFirstPage(), tags2);
blacklistedTag = RipUtils.checkTags(tags2, tagsOnPage);
assertEquals("midnight on mars", blacklistedTag);
}
}

View File

@ -2,8 +2,10 @@ package com.rarchives.ripme.tst.ripper.rippers;
import java.io.IOException;
import java.net.URL;
import java.util.List;
import com.rarchives.ripme.ripper.rippers.NhentaiRipper;
import com.rarchives.ripme.utils.RipUtils;
public class NhentaiRipperTest extends RippersTest {
public void testRip() throws IOException {
@ -20,14 +22,15 @@ public class NhentaiRipperTest extends RippersTest {
public void testTagBlackList() throws IOException {
URL url = new URL("https://nhentai.net/g/233295/");
NhentaiRipper ripper = new NhentaiRipper(url);
List<String> tagsOnPage = ripper.getTags(ripper.getFirstPage());
// Test multiple blacklisted tags
String[] tags = {"test", "one", "blowjob"};
String blacklistedTag = ripper.checkTags(ripper.getFirstPage(), tags);
String blacklistedTag = RipUtils.checkTags(tags, tagsOnPage);
assertEquals("blowjob", blacklistedTag);
// test tags with spaces in them
String[] tags2 = {"test", "one", "sole female"};
blacklistedTag = ripper.checkTags(ripper.getFirstPage(), tags2);
assertEquals("sole female", blacklistedTag);
String[] tags2 = {"test", "one", "sole-female"};
blacklistedTag = RipUtils.checkTags(tags2, tagsOnPage);
assertEquals("sole-female", blacklistedTag);
}
}