Merge pull request #941 from cyian-1756/tagBlackListRefactoring
Tag black list refactoring
This commit is contained in:
commit
e8b65ab7d8
@ -12,6 +12,7 @@ import java.util.regex.Matcher;
|
|||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
import com.rarchives.ripme.ui.RipStatusMessage;
|
import com.rarchives.ripme.ui.RipStatusMessage;
|
||||||
|
import com.rarchives.ripme.utils.RipUtils;
|
||||||
import org.jsoup.nodes.Document;
|
import org.jsoup.nodes.Document;
|
||||||
import org.jsoup.nodes.Element;
|
import org.jsoup.nodes.Element;
|
||||||
import org.jsoup.select.Elements;
|
import org.jsoup.select.Elements;
|
||||||
@ -126,32 +127,7 @@ public class EHentaiRipper extends AbstractHTMLRipper {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
public List<String> getTags(Document doc) {
|
||||||
* Checks for blacklisted tags on page. If it finds one it returns it, if not it return null
|
|
||||||
*
|
|
||||||
* @param doc
|
|
||||||
* @return String
|
|
||||||
*/
|
|
||||||
public String checkTags(Document doc, String[] blackListedTags) {
|
|
||||||
// If the user hasn't blacklisted any tags we return null;
|
|
||||||
if (blackListedTags == null) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
LOGGER.info("Blacklisted tags " + blackListedTags[0]);
|
|
||||||
List<String> tagsOnPage = getTags(doc);
|
|
||||||
for (String tag : blackListedTags) {
|
|
||||||
for (String pageTag : tagsOnPage) {
|
|
||||||
// We replace all dashes in the tag with spaces because the tags we get from the site are separated using
|
|
||||||
// dashes
|
|
||||||
if (tag.trim().toLowerCase().equals(pageTag.toLowerCase())) {
|
|
||||||
return tag;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
private List<String> getTags(Document doc) {
|
|
||||||
List<String> tags = new ArrayList<>();
|
List<String> tags = new ArrayList<>();
|
||||||
LOGGER.info("Getting tags");
|
LOGGER.info("Getting tags");
|
||||||
for (Element tag : doc.select("td > div > a")) {
|
for (Element tag : doc.select("td > div > a")) {
|
||||||
@ -169,7 +145,7 @@ public class EHentaiRipper extends AbstractHTMLRipper {
|
|||||||
}
|
}
|
||||||
this.lastURL = this.url.toExternalForm();
|
this.lastURL = this.url.toExternalForm();
|
||||||
LOGGER.info("Checking blacklist");
|
LOGGER.info("Checking blacklist");
|
||||||
String blacklistedTag = checkTags(albumDoc, Utils.getConfigStringArray("ehentai.blacklist.tags"));
|
String blacklistedTag = RipUtils.checkTags(Utils.getConfigStringArray("ehentai.blacklist.tags"), getTags(albumDoc));
|
||||||
if (blacklistedTag != null) {
|
if (blacklistedTag != null) {
|
||||||
sendUpdate(RipStatusMessage.STATUS.DOWNLOAD_WARN, "Skipping " + url.toExternalForm() + " as it " +
|
sendUpdate(RipStatusMessage.STATUS.DOWNLOAD_WARN, "Skipping " + url.toExternalForm() + " as it " +
|
||||||
"contains the blacklisted tag \"" + blacklistedTag + "\"");
|
"contains the blacklisted tag \"" + blacklistedTag + "\"");
|
||||||
|
@ -4,6 +4,7 @@ import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
|||||||
import com.rarchives.ripme.ripper.DownloadThreadPool;
|
import com.rarchives.ripme.ripper.DownloadThreadPool;
|
||||||
import com.rarchives.ripme.ui.RipStatusMessage;
|
import com.rarchives.ripme.ui.RipStatusMessage;
|
||||||
import com.rarchives.ripme.utils.Http;
|
import com.rarchives.ripme.utils.Http;
|
||||||
|
import com.rarchives.ripme.utils.RipUtils;
|
||||||
import com.rarchives.ripme.utils.Utils;
|
import com.rarchives.ripme.utils.Utils;
|
||||||
import org.jsoup.nodes.Document;
|
import org.jsoup.nodes.Document;
|
||||||
import org.jsoup.nodes.Element;
|
import org.jsoup.nodes.Element;
|
||||||
@ -82,39 +83,16 @@ public class NhentaiRipper extends AbstractHTMLRipper {
|
|||||||
return "nhentai" + title;
|
return "nhentai" + title;
|
||||||
}
|
}
|
||||||
|
|
||||||
private List<String> getTags(Document doc) {
|
public List<String> getTags(Document doc) {
|
||||||
List<String> tags = new ArrayList<>();
|
List<String> tags = new ArrayList<>();
|
||||||
for (Element tag : doc.select("a.tag")) {
|
for (Element tag : doc.select("a.tag")) {
|
||||||
tags.add(tag.attr("href").replaceAll("/tag/", "").replaceAll("/", ""));
|
String tagString = tag.attr("href").replaceAll("/tag/", "").replaceAll("/", "");
|
||||||
|
LOGGER.info("Found tag: " + tagString);
|
||||||
|
tags.add(tagString);
|
||||||
}
|
}
|
||||||
return tags;
|
return tags;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Checks for blacklisted tags on page. If it finds one it returns it, if not it return null
|
|
||||||
*
|
|
||||||
* @param doc
|
|
||||||
* @return String
|
|
||||||
*/
|
|
||||||
public String checkTags(Document doc, String[] blackListedTags) {
|
|
||||||
// If the user hasn't blacklisted any tags we return false;
|
|
||||||
if (blackListedTags == null) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
LOGGER.info("Blacklisted tags " + blackListedTags[0]);
|
|
||||||
List<String> tagsOnPage = getTags(doc);
|
|
||||||
for (String tag : blackListedTags) {
|
|
||||||
for (String pageTag : tagsOnPage) {
|
|
||||||
// We replace all dashes in the tag with spaces because the tags we get from the site are separated using
|
|
||||||
// dashes
|
|
||||||
if (tag.trim().toLowerCase().equals(pageTag.replaceAll("-", " ").toLowerCase())) {
|
|
||||||
return tag;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getGID(URL url) throws MalformedURLException {
|
public String getGID(URL url) throws MalformedURLException {
|
||||||
// Ex: https://nhentai.net/g/159174/
|
// Ex: https://nhentai.net/g/159174/
|
||||||
@ -134,7 +112,7 @@ public class NhentaiRipper extends AbstractHTMLRipper {
|
|||||||
firstPage = Http.url(url).get();
|
firstPage = Http.url(url).get();
|
||||||
}
|
}
|
||||||
|
|
||||||
String blacklistedTag = checkTags(firstPage, Utils.getConfigStringArray("nhentai.blacklist.tags"));
|
String blacklistedTag = RipUtils.checkTags(Utils.getConfigStringArray("nhentai.blacklist.tags"), getTags(firstPage));
|
||||||
if (blacklistedTag != null) {
|
if (blacklistedTag != null) {
|
||||||
sendUpdate(RipStatusMessage.STATUS.DOWNLOAD_WARN, "Skipping " + url.toExternalForm() + " as it " +
|
sendUpdate(RipStatusMessage.STATUS.DOWNLOAD_WARN, "Skipping " + url.toExternalForm() + " as it " +
|
||||||
"contains the blacklisted tag \"" + blacklistedTag + "\"");
|
"contains the blacklisted tag \"" + blacklistedTag + "\"");
|
||||||
|
@ -2,8 +2,10 @@ package com.rarchives.ripme.tst.ripper.rippers;
|
|||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
import com.rarchives.ripme.ripper.rippers.EHentaiRipper;
|
import com.rarchives.ripme.ripper.rippers.EHentaiRipper;
|
||||||
|
import com.rarchives.ripme.utils.RipUtils;
|
||||||
|
|
||||||
public class EhentaiRipperTest extends RippersTest {
|
public class EhentaiRipperTest extends RippersTest {
|
||||||
public void testEHentaiAlbum() throws IOException {
|
public void testEHentaiAlbum() throws IOException {
|
||||||
@ -15,14 +17,15 @@ public class EhentaiRipperTest extends RippersTest {
|
|||||||
public void testTagBlackList() throws IOException {
|
public void testTagBlackList() throws IOException {
|
||||||
URL url = new URL("https://e-hentai.org/g/1228503/1a2f455f96/");
|
URL url = new URL("https://e-hentai.org/g/1228503/1a2f455f96/");
|
||||||
EHentaiRipper ripper = new EHentaiRipper(url);
|
EHentaiRipper ripper = new EHentaiRipper(url);
|
||||||
|
List<String> tagsOnPage = ripper.getTags(ripper.getFirstPage());
|
||||||
// Test multiple blacklisted tags
|
// Test multiple blacklisted tags
|
||||||
String[] tags = {"test", "one", "yuri"};
|
String[] tags = {"test", "one", "yuri"};
|
||||||
String blacklistedTag = ripper.checkTags(ripper.getFirstPage(), tags);
|
String blacklistedTag = RipUtils.checkTags(tags, tagsOnPage);
|
||||||
assertEquals("yuri", blacklistedTag);
|
assertEquals("yuri", blacklistedTag);
|
||||||
|
|
||||||
// test tags with spaces in them
|
// test tags with spaces in them
|
||||||
String[] tags2 = {"test", "one", "midnight on mars"};
|
String[] tags2 = {"test", "one", "midnight on mars"};
|
||||||
blacklistedTag = ripper.checkTags(ripper.getFirstPage(), tags2);
|
blacklistedTag = RipUtils.checkTags(tags2, tagsOnPage);
|
||||||
assertEquals("midnight on mars", blacklistedTag);
|
assertEquals("midnight on mars", blacklistedTag);
|
||||||
}
|
}
|
||||||
}
|
}
|
@ -2,8 +2,10 @@ package com.rarchives.ripme.tst.ripper.rippers;
|
|||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
import com.rarchives.ripme.ripper.rippers.NhentaiRipper;
|
import com.rarchives.ripme.ripper.rippers.NhentaiRipper;
|
||||||
|
import com.rarchives.ripme.utils.RipUtils;
|
||||||
|
|
||||||
public class NhentaiRipperTest extends RippersTest {
|
public class NhentaiRipperTest extends RippersTest {
|
||||||
public void testRip() throws IOException {
|
public void testRip() throws IOException {
|
||||||
@ -20,14 +22,15 @@ public class NhentaiRipperTest extends RippersTest {
|
|||||||
public void testTagBlackList() throws IOException {
|
public void testTagBlackList() throws IOException {
|
||||||
URL url = new URL("https://nhentai.net/g/233295/");
|
URL url = new URL("https://nhentai.net/g/233295/");
|
||||||
NhentaiRipper ripper = new NhentaiRipper(url);
|
NhentaiRipper ripper = new NhentaiRipper(url);
|
||||||
|
List<String> tagsOnPage = ripper.getTags(ripper.getFirstPage());
|
||||||
// Test multiple blacklisted tags
|
// Test multiple blacklisted tags
|
||||||
String[] tags = {"test", "one", "blowjob"};
|
String[] tags = {"test", "one", "blowjob"};
|
||||||
String blacklistedTag = ripper.checkTags(ripper.getFirstPage(), tags);
|
String blacklistedTag = RipUtils.checkTags(tags, tagsOnPage);
|
||||||
assertEquals("blowjob", blacklistedTag);
|
assertEquals("blowjob", blacklistedTag);
|
||||||
|
|
||||||
// test tags with spaces in them
|
// test tags with spaces in them
|
||||||
String[] tags2 = {"test", "one", "sole female"};
|
String[] tags2 = {"test", "one", "sole-female"};
|
||||||
blacklistedTag = ripper.checkTags(ripper.getFirstPage(), tags2);
|
blacklistedTag = RipUtils.checkTags(tags2, tagsOnPage);
|
||||||
assertEquals("sole female", blacklistedTag);
|
assertEquals("sole-female", blacklistedTag);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user