Started work on fixing da ripper

This commit is contained in:
cyian-1756 2018-06-28 21:04:05 -04:00
parent f8652ceaf7
commit e83032906d

View File

@ -66,7 +66,7 @@ public class DeviantartRipper extends AbstractHTMLRipper {
u += "gallery/?"; u += "gallery/?";
} }
Pattern p = Pattern.compile("^https?://([a-zA-Z0-9\\-]+)\\.deviantart\\.com/favou?rites/([0-9]+)/*?$"); Pattern p = Pattern.compile("^https?://www\\.deviantart\\.com/([a-zA-Z0-9\\-]+)/favou?rites/([0-9]+)/*?$");
Matcher m = p.matcher(url.toExternalForm()); Matcher m = p.matcher(url.toExternalForm());
if (!m.matches()) { if (!m.matches()) {
String subdir = "/"; String subdir = "/";
@ -80,7 +80,7 @@ public class DeviantartRipper extends AbstractHTMLRipper {
@Override @Override
public String getGID(URL url) throws MalformedURLException { public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^https?://([a-zA-Z0-9\\-]+)\\.deviantart\\.com(/gallery)?/?(\\?.*)?$"); Pattern p = Pattern.compile("^https?://www\\.deviantart\\.com/([a-zA-Z0-9\\-]+)(/gallery)?/?(\\?.*)?$");
Matcher m = p.matcher(url.toExternalForm()); Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) { if (m.matches()) {
// Root gallery // Root gallery
@ -91,24 +91,24 @@ public class DeviantartRipper extends AbstractHTMLRipper {
return m.group(1); return m.group(1);
} }
} }
p = Pattern.compile("^https?://([a-zA-Z0-9\\-]+)\\.deviantart\\.com/gallery/([0-9]+).*$"); p = Pattern.compile("^https?://www\\.deviantart\\.com/([a-zA-Z0-9\\-]+)/gallery/([0-9]+).*$");
m = p.matcher(url.toExternalForm()); m = p.matcher(url.toExternalForm());
if (m.matches()) { if (m.matches()) {
// Subgallery // Subgallery
return m.group(1) + "_" + m.group(2); return m.group(1) + "_" + m.group(2);
} }
p = Pattern.compile("^https?://([a-zA-Z0-9\\-]+)\\.deviantart\\.com/favou?rites/([0-9]+)/.*?$"); p = Pattern.compile("^https?://www\\.deviantart\\.com/([a-zA-Z0-9\\-]+)/favou?rites/([0-9]+)/.*?$");
m = p.matcher(url.toExternalForm()); m = p.matcher(url.toExternalForm());
if (m.matches()) { if (m.matches()) {
return m.group(1) + "_faves_" + m.group(2); return m.group(1) + "_faves_" + m.group(2);
} }
p = Pattern.compile("^https?://([a-zA-Z0-9\\-]+)\\.deviantart\\.com/favou?rites/?$"); p = Pattern.compile("^https?://www\\.deviantart\\.com/([a-zA-Z0-9\\-]+)/favou?rites/?$");
m = p.matcher(url.toExternalForm()); m = p.matcher(url.toExternalForm());
if (m.matches()) { if (m.matches()) {
// Subgallery // Subgallery
return m.group(1) + "_faves"; return m.group(1) + "_faves";
} }
throw new MalformedURLException("Expected URL format: http://username.deviantart.com/[/gallery/#####], got: " + url); throw new MalformedURLException("Expected URL format: http://www.deviantart.com/username[/gallery/#####], got: " + url);
} }
/** /**
@ -238,26 +238,19 @@ public class DeviantartRipper extends AbstractHTMLRipper {
if (isThisATest()) { if (isThisATest()) {
return null; return null;
} }
Elements nextButtons = page.select("link[rel=\"next\"]"); String baseURL = "https://www.deviantart.com/dapi/v1/gallery/";
if (nextButtons.isEmpty()) { String id = page.select("div[gmi-name=gallery]").first().attr("gmi-itemid");
if (page.select("link[rel=\"prev\"]").isEmpty()) { baseURL = baseURL + id;
throw new IOException("No next page found"); String requestID = getRequestID(page);
} else { Document d = Http.url(baseURL).data("idd", requestID).post();
throw new IOException("Hit end of pages"); LOGGER.info(d.html());
return d;
} }
}
Element a = nextButtons.first(); private String getRequestID(Document doc) {
String nextPage = a.attr("href"); Pattern p = Pattern.compile("requestid\":\"([a-zA-Z0-9]+)\"");
if (nextPage.startsWith("/")) { Matcher m = p.matcher(doc.html());
nextPage = "http://" + this.url.getHost() + nextPage; return "590m257da2ea3eea661e272dde2948081c4d";
}
if (!sleep(PAGE_SLEEP_TIME)) {
throw new IOException("Interrupted while waiting to load next page: " + nextPage);
}
LOGGER.info("Found next page: " + nextPage);
return Http.url(nextPage)
.cookies(cookies)
.get();
} }
@Override @Override