Merge remote-tracking branch 'upstream/master'

This commit is contained in:
digitalnoise 2018-09-13 14:43:44 -05:00
commit 9a26fdc515
5 changed files with 77 additions and 7 deletions

View File

@ -32,7 +32,7 @@ public class HbrowseRipper extends AbstractHTMLRipper {
@Override @Override
public String getGID(URL url) throws MalformedURLException { public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("http://www.hbrowse.com/(\\d+)/[a-zA-Z0-9]*"); Pattern p = Pattern.compile("https?://www.hbrowse.com/(\\d+)/[a-zA-Z0-9]*");
Matcher m = p.matcher(url.toExternalForm()); Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) { if (m.matches()) {
return m.group(1); return m.group(1);
@ -45,7 +45,7 @@ public class HbrowseRipper extends AbstractHTMLRipper {
public Document getFirstPage() throws IOException { public Document getFirstPage() throws IOException {
// "url" is an instance field of the superclass // "url" is an instance field of the superclass
Document tempDoc = Http.url(url).get(); Document tempDoc = Http.url(url).get();
return Http.url(tempDoc.select("td[id=pageTopHome] > a[title=view thumbnails (top)]").attr("href")).get(); return Http.url("https://www.hbrowse.com" + tempDoc.select("td[id=pageTopHome] > a[title=view thumbnails (top)]").attr("href")).get();
} }
@Override @Override
@ -66,7 +66,7 @@ public class HbrowseRipper extends AbstractHTMLRipper {
List<String> result = new ArrayList<String>(); List<String> result = new ArrayList<String>();
for (Element el : doc.select("table > tbody > tr > td > a > img")) { for (Element el : doc.select("table > tbody > tr > td > a > img")) {
String imageURL = el.attr("src").replace("/zzz", ""); String imageURL = el.attr("src").replace("/zzz", "");
result.add(imageURL); result.add("https://www.hbrowse.com" + imageURL);
} }
return result; return result;
} }

View File

@ -12,6 +12,8 @@ import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import com.rarchives.ripme.ui.RipStatusMessage; import com.rarchives.ripme.ui.RipStatusMessage;
import com.rarchives.ripme.utils.RipUtils;
import com.rarchives.ripme.utils.Utils;
import org.json.JSONArray; import org.json.JSONArray;
import org.json.JSONObject; import org.json.JSONObject;
import org.jsoup.Connection; import org.jsoup.Connection;
@ -21,6 +23,7 @@ import org.jsoup.nodes.Document;
import com.rarchives.ripme.ripper.AbstractHTMLRipper; import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http; import com.rarchives.ripme.utils.Http;
import org.jsoup.nodes.Element;
public class TsuminoRipper extends AbstractHTMLRipper { public class TsuminoRipper extends AbstractHTMLRipper {
private Map<String,String> cookies = new HashMap<>(); private Map<String,String> cookies = new HashMap<>();
@ -29,6 +32,16 @@ public class TsuminoRipper extends AbstractHTMLRipper {
super(url); super(url);
} }
public List<String> getTags(Document doc) {
List<String> tags = new ArrayList<>();
LOGGER.info("Getting tags");
for (Element tag : doc.select("div#Tag > a")) {
LOGGER.info("Found tag " + tag.text());
tags.add(tag.text().toLowerCase());
}
return tags;
}
private JSONArray getPageUrls() { private JSONArray getPageUrls() {
String postURL = "http://www.tsumino.com/Read/Load"; String postURL = "http://www.tsumino.com/Read/Load";
try { try {
@ -86,7 +99,14 @@ public class TsuminoRipper extends AbstractHTMLRipper {
public Document getFirstPage() throws IOException { public Document getFirstPage() throws IOException {
Connection.Response resp = Http.url(url).response(); Connection.Response resp = Http.url(url).response();
cookies.putAll(resp.cookies()); cookies.putAll(resp.cookies());
return resp.parse(); Document doc = resp.parse();
String blacklistedTag = RipUtils.checkTags(Utils.getConfigStringArray("tsumino.blacklist.tags"), getTags(doc));
if (blacklistedTag != null) {
sendUpdate(RipStatusMessage.STATUS.DOWNLOAD_WARN, "Skipping " + url.toExternalForm() + " as it " +
"contains the blacklisted tag \"" + blacklistedTag + "\"");
return null;
}
return doc;
} }
@Override @Override

View File

@ -305,4 +305,28 @@ public class RipUtils {
} }
return cookies; return cookies;
} }
/**
* Checks for blacklisted tags on page. If it finds one it returns it, if not it return null
*
* @param blackListedTags a string array of the blacklisted tags
* @param tagsOnPage the tags on the page
* @return String
*/
public static String checkTags(String[] blackListedTags, List<String> tagsOnPage) {
// If the user hasn't blacklisted any tags we return null;
if (blackListedTags == null) {
return null;
}
for (String tag : blackListedTags) {
for (String pageTag : tagsOnPage) {
// We replace all dashes in the tag with spaces because the tags we get from the site are separated using
// dashes
if (tag.trim().toLowerCase().equals(pageTag.toLowerCase())) {
return tag.toLowerCase();
}
}
}
return null;
}
} }

View File

@ -6,5 +6,8 @@ import java.net.URL;
import com.rarchives.ripme.ripper.rippers.HbrowseRipper; import com.rarchives.ripme.ripper.rippers.HbrowseRipper;
public class HbrowseRipperTest extends RippersTest { public class HbrowseRipperTest extends RippersTest {
// TODO add a test public void testPahealRipper() throws IOException {
HbrowseRipper ripper = new HbrowseRipper(new URL("https://www.hbrowse.com/21013/c00001"));
testRipper(ripper);
}
} }

View File

@ -2,13 +2,36 @@ package com.rarchives.ripme.tst.ripper.rippers;
import java.io.IOException; import java.io.IOException;
import java.net.URL; import java.net.URL;
import java.util.List;
import com.rarchives.ripme.ripper.rippers.TsuminoRipper; import com.rarchives.ripme.ripper.rippers.TsuminoRipper;
import com.rarchives.ripme.utils.RipUtils;
import org.jsoup.nodes.Document;
public class TsuminoRipperTest extends RippersTest { public class TsuminoRipperTest extends RippersTest {
public void testPahealRipper() throws IOException { public void testTsuminoRipper() throws IOException {
// a photo set
TsuminoRipper ripper = new TsuminoRipper(new URL("http://www.tsumino.com/Book/Info/42882/chaldea-maid-")); TsuminoRipper ripper = new TsuminoRipper(new URL("http://www.tsumino.com/Book/Info/42882/chaldea-maid-"));
testRipper(ripper); testRipper(ripper);
} }
public void testTagBlackList() throws IOException {
TsuminoRipper ripper = new TsuminoRipper(new URL("http://www.tsumino.com/Book/Info/42882/chaldea-maid-"));
Document doc = ripper.getFirstPage();
List<String> tagsOnPage = ripper.getTags(doc);
String[] tags1 = {"test", "one", "Blowjob"};
String blacklistedTag = RipUtils.checkTags(tags1, tagsOnPage);
assertEquals("blowjob", blacklistedTag);
// Test a tag with spaces
String[] tags2 = {"test", "one", "Full Color"};
blacklistedTag = RipUtils.checkTags(tags2, tagsOnPage);
assertEquals("full color", blacklistedTag);
// Test a album with no blacklisted tags
String[] tags3 = {"nothing", "one", "null"};
blacklistedTag = RipUtils.checkTags(tags3, tagsOnPage);
assertNull(blacklistedTag);
}
} }