Merge pull request #938 from cyian-1756/TsuminoTagBlackListing

Added Tsumino tag black listing
This commit is contained in:
cyian-1756 2018-09-13 08:56:13 -04:00 committed by GitHub
commit 2ae84442be
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 70 additions and 3 deletions

View File

@ -12,6 +12,8 @@ import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.rarchives.ripme.ui.RipStatusMessage;
import com.rarchives.ripme.utils.RipUtils;
import com.rarchives.ripme.utils.Utils;
import org.json.JSONArray;
import org.json.JSONObject;
import org.jsoup.Connection;
@ -21,6 +23,7 @@ import org.jsoup.nodes.Document;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
import org.jsoup.nodes.Element;
public class TsuminoRipper extends AbstractHTMLRipper {
private Map<String,String> cookies = new HashMap<>();
@ -29,6 +32,16 @@ public class TsuminoRipper extends AbstractHTMLRipper {
super(url);
}
public List<String> getTags(Document doc) {
List<String> tags = new ArrayList<>();
LOGGER.info("Getting tags");
for (Element tag : doc.select("div#Tag > a")) {
LOGGER.info("Found tag " + tag.text());
tags.add(tag.text().toLowerCase());
}
return tags;
}
private JSONArray getPageUrls() {
String postURL = "http://www.tsumino.com/Read/Load";
try {
@ -86,7 +99,14 @@ public class TsuminoRipper extends AbstractHTMLRipper {
public Document getFirstPage() throws IOException {
Connection.Response resp = Http.url(url).response();
cookies.putAll(resp.cookies());
return resp.parse();
Document doc = resp.parse();
String blacklistedTag = RipUtils.checkTags(Utils.getConfigStringArray("tsumino.blacklist.tags"), getTags(doc));
if (blacklistedTag != null) {
sendUpdate(RipStatusMessage.STATUS.DOWNLOAD_WARN, "Skipping " + url.toExternalForm() + " as it " +
"contains the blacklisted tag \"" + blacklistedTag + "\"");
return null;
}
return doc;
}
@Override

View File

@ -305,4 +305,28 @@ public class RipUtils {
}
return cookies;
}
/**
* Checks for blacklisted tags on page. If it finds one it returns it, if not it return null
*
* @param blackListedTags a string array of the blacklisted tags
* @param tagsOnPage the tags on the page
* @return String
*/
public static String checkTags(String[] blackListedTags, List<String> tagsOnPage) {
// If the user hasn't blacklisted any tags we return null;
if (blackListedTags == null) {
return null;
}
for (String tag : blackListedTags) {
for (String pageTag : tagsOnPage) {
// We replace all dashes in the tag with spaces because the tags we get from the site are separated using
// dashes
if (tag.trim().toLowerCase().equals(pageTag.toLowerCase())) {
return tag.toLowerCase();
}
}
}
return null;
}
}

View File

@ -2,13 +2,36 @@ package com.rarchives.ripme.tst.ripper.rippers;
import java.io.IOException;
import java.net.URL;
import java.util.List;
import com.rarchives.ripme.ripper.rippers.TsuminoRipper;
import com.rarchives.ripme.utils.RipUtils;
import org.jsoup.nodes.Document;
public class TsuminoRipperTest extends RippersTest {
public void testPahealRipper() throws IOException {
// a photo set
public void testTsuminoRipper() throws IOException {
TsuminoRipper ripper = new TsuminoRipper(new URL("http://www.tsumino.com/Book/Info/42882/chaldea-maid-"));
testRipper(ripper);
}
public void testTagBlackList() throws IOException {
TsuminoRipper ripper = new TsuminoRipper(new URL("http://www.tsumino.com/Book/Info/42882/chaldea-maid-"));
Document doc = ripper.getFirstPage();
List<String> tagsOnPage = ripper.getTags(doc);
String[] tags1 = {"test", "one", "Blowjob"};
String blacklistedTag = RipUtils.checkTags(tags1, tagsOnPage);
assertEquals("blowjob", blacklistedTag);
// Test a tag with spaces
String[] tags2 = {"test", "one", "Full Color"};
blacklistedTag = RipUtils.checkTags(tags2, tagsOnPage);
assertEquals("full color", blacklistedTag);
// Test a album with no blacklisted tags
String[] tags3 = {"nothing", "one", "null"};
blacklistedTag = RipUtils.checkTags(tags3, tagsOnPage);
assertNull(blacklistedTag);
}
}