Merge remote-tracking branch 'upstream/master'
This commit is contained in:
commit
9a26fdc515
@ -32,7 +32,7 @@ public class HbrowseRipper extends AbstractHTMLRipper {
|
||||
|
||||
@Override
|
||||
public String getGID(URL url) throws MalformedURLException {
|
||||
Pattern p = Pattern.compile("http://www.hbrowse.com/(\\d+)/[a-zA-Z0-9]*");
|
||||
Pattern p = Pattern.compile("https?://www.hbrowse.com/(\\d+)/[a-zA-Z0-9]*");
|
||||
Matcher m = p.matcher(url.toExternalForm());
|
||||
if (m.matches()) {
|
||||
return m.group(1);
|
||||
@ -45,7 +45,7 @@ public class HbrowseRipper extends AbstractHTMLRipper {
|
||||
public Document getFirstPage() throws IOException {
|
||||
// "url" is an instance field of the superclass
|
||||
Document tempDoc = Http.url(url).get();
|
||||
return Http.url(tempDoc.select("td[id=pageTopHome] > a[title=view thumbnails (top)]").attr("href")).get();
|
||||
return Http.url("https://www.hbrowse.com" + tempDoc.select("td[id=pageTopHome] > a[title=view thumbnails (top)]").attr("href")).get();
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -66,7 +66,7 @@ public class HbrowseRipper extends AbstractHTMLRipper {
|
||||
List<String> result = new ArrayList<String>();
|
||||
for (Element el : doc.select("table > tbody > tr > td > a > img")) {
|
||||
String imageURL = el.attr("src").replace("/zzz", "");
|
||||
result.add(imageURL);
|
||||
result.add("https://www.hbrowse.com" + imageURL);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
@ -12,6 +12,8 @@ import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import com.rarchives.ripme.ui.RipStatusMessage;
|
||||
import com.rarchives.ripme.utils.RipUtils;
|
||||
import com.rarchives.ripme.utils.Utils;
|
||||
import org.json.JSONArray;
|
||||
import org.json.JSONObject;
|
||||
import org.jsoup.Connection;
|
||||
@ -21,6 +23,7 @@ import org.jsoup.nodes.Document;
|
||||
|
||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
import org.jsoup.nodes.Element;
|
||||
|
||||
public class TsuminoRipper extends AbstractHTMLRipper {
|
||||
private Map<String,String> cookies = new HashMap<>();
|
||||
@ -29,6 +32,16 @@ public class TsuminoRipper extends AbstractHTMLRipper {
|
||||
super(url);
|
||||
}
|
||||
|
||||
public List<String> getTags(Document doc) {
|
||||
List<String> tags = new ArrayList<>();
|
||||
LOGGER.info("Getting tags");
|
||||
for (Element tag : doc.select("div#Tag > a")) {
|
||||
LOGGER.info("Found tag " + tag.text());
|
||||
tags.add(tag.text().toLowerCase());
|
||||
}
|
||||
return tags;
|
||||
}
|
||||
|
||||
private JSONArray getPageUrls() {
|
||||
String postURL = "http://www.tsumino.com/Read/Load";
|
||||
try {
|
||||
@ -86,7 +99,14 @@ public class TsuminoRipper extends AbstractHTMLRipper {
|
||||
public Document getFirstPage() throws IOException {
|
||||
Connection.Response resp = Http.url(url).response();
|
||||
cookies.putAll(resp.cookies());
|
||||
return resp.parse();
|
||||
Document doc = resp.parse();
|
||||
String blacklistedTag = RipUtils.checkTags(Utils.getConfigStringArray("tsumino.blacklist.tags"), getTags(doc));
|
||||
if (blacklistedTag != null) {
|
||||
sendUpdate(RipStatusMessage.STATUS.DOWNLOAD_WARN, "Skipping " + url.toExternalForm() + " as it " +
|
||||
"contains the blacklisted tag \"" + blacklistedTag + "\"");
|
||||
return null;
|
||||
}
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -305,4 +305,28 @@ public class RipUtils {
|
||||
}
|
||||
return cookies;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks for blacklisted tags on page. If it finds one it returns it, if not it return null
|
||||
*
|
||||
* @param blackListedTags a string array of the blacklisted tags
|
||||
* @param tagsOnPage the tags on the page
|
||||
* @return String
|
||||
*/
|
||||
public static String checkTags(String[] blackListedTags, List<String> tagsOnPage) {
|
||||
// If the user hasn't blacklisted any tags we return null;
|
||||
if (blackListedTags == null) {
|
||||
return null;
|
||||
}
|
||||
for (String tag : blackListedTags) {
|
||||
for (String pageTag : tagsOnPage) {
|
||||
// We replace all dashes in the tag with spaces because the tags we get from the site are separated using
|
||||
// dashes
|
||||
if (tag.trim().toLowerCase().equals(pageTag.toLowerCase())) {
|
||||
return tag.toLowerCase();
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
@ -6,5 +6,8 @@ import java.net.URL;
|
||||
import com.rarchives.ripme.ripper.rippers.HbrowseRipper;
|
||||
|
||||
public class HbrowseRipperTest extends RippersTest {
|
||||
// TODO add a test
|
||||
public void testPahealRipper() throws IOException {
|
||||
HbrowseRipper ripper = new HbrowseRipper(new URL("https://www.hbrowse.com/21013/c00001"));
|
||||
testRipper(ripper);
|
||||
}
|
||||
}
|
@ -2,13 +2,36 @@ package com.rarchives.ripme.tst.ripper.rippers;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.URL;
|
||||
import java.util.List;
|
||||
|
||||
import com.rarchives.ripme.ripper.rippers.TsuminoRipper;
|
||||
import com.rarchives.ripme.utils.RipUtils;
|
||||
import org.jsoup.nodes.Document;
|
||||
|
||||
|
||||
public class TsuminoRipperTest extends RippersTest {
|
||||
public void testPahealRipper() throws IOException {
|
||||
// a photo set
|
||||
public void testTsuminoRipper() throws IOException {
|
||||
TsuminoRipper ripper = new TsuminoRipper(new URL("http://www.tsumino.com/Book/Info/42882/chaldea-maid-"));
|
||||
testRipper(ripper);
|
||||
}
|
||||
|
||||
public void testTagBlackList() throws IOException {
|
||||
TsuminoRipper ripper = new TsuminoRipper(new URL("http://www.tsumino.com/Book/Info/42882/chaldea-maid-"));
|
||||
Document doc = ripper.getFirstPage();
|
||||
List<String> tagsOnPage = ripper.getTags(doc);
|
||||
String[] tags1 = {"test", "one", "Blowjob"};
|
||||
String blacklistedTag = RipUtils.checkTags(tags1, tagsOnPage);
|
||||
assertEquals("blowjob", blacklistedTag);
|
||||
|
||||
// Test a tag with spaces
|
||||
String[] tags2 = {"test", "one", "Full Color"};
|
||||
blacklistedTag = RipUtils.checkTags(tags2, tagsOnPage);
|
||||
assertEquals("full color", blacklistedTag);
|
||||
|
||||
// Test a album with no blacklisted tags
|
||||
String[] tags3 = {"nothing", "one", "null"};
|
||||
blacklistedTag = RipUtils.checkTags(tags3, tagsOnPage);
|
||||
assertNull(blacklistedTag);
|
||||
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user