From 0594e55d1fdd79af5e3a9f5776d2bbaee6e681f6 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Sat, 19 May 2018 10:42:50 -0400 Subject: [PATCH] SankakuComplexRipper can now download from different subdomains --- .../ripper/rippers/SankakuComplexRipper.java | 22 ++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/SankakuComplexRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/SankakuComplexRipper.java index c6440bb8..d83d5930 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/SankakuComplexRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/SankakuComplexRipper.java @@ -43,7 +43,7 @@ public class SankakuComplexRipper extends AbstractHTMLRipper { Matcher m = p.matcher(url.toExternalForm()); if (m.matches()) { try { - return URLDecoder.decode(m.group(2), "UTF-8"); + return URLDecoder.decode(m.group(1) + "_" + m.group(2), "UTF-8"); } catch (UnsupportedEncodingException e) { throw new MalformedURLException("Cannot decode tag name '" + m.group(1) + "'"); } @@ -53,6 +53,20 @@ public class SankakuComplexRipper extends AbstractHTMLRipper { url + "instead"); } + public String getSubDomain(URL url){ + Pattern p = Pattern.compile("^https?://([a-zA-Z0-9]+\\.)?sankakucomplex\\.com/.*tags=([^&]+).*$"); + Matcher m = p.matcher(url.toExternalForm()); + if (m.matches()) { + try { + return URLDecoder.decode(m.group(1), "UTF-8"); + } catch (UnsupportedEncodingException e) { + return null; + } + } + return null; + + } + @Override public Document getFirstPage() throws IOException { if (albumDoc == null) { @@ -71,9 +85,11 @@ public class SankakuComplexRipper extends AbstractHTMLRipper { for (Element thumbSpan : doc.select("div.content > div > span.thumb > a")) { String postLink = thumbSpan.attr("href"); try { + String subDomain = getSubDomain(url); + String siteURL = "https://" + subDomain + "sankakucomplex.com"; // Get the page the full sized image is on - Document subPage = Http.url("https://chan.sankakucomplex.com" + postLink).get(); - logger.info("Checking page " + "https://chan.sankakucomplex.com" + postLink); + Document subPage = Http.url(siteURL + postLink).get(); + logger.info("Checking page " + siteURL + postLink); imageURLs.add("https:" + subPage.select("div[id=stats] > ul > li > a[id=highres]").attr("href")); } catch (IOException e) { logger.warn("Error while loading page " + postLink, e);