From e5096736bfa99217bea948dc8c06486ac8409a38 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Fri, 9 Mar 2018 09:25:50 -0500 Subject: [PATCH] Added tsuminoRipper --- .../ripme/ripper/rippers/TsuminoRipper.java | 108 ++++++++++++++++++ 1 file changed, 108 insertions(+) create mode 100644 src/main/java/com/rarchives/ripme/ripper/rippers/TsuminoRipper.java diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/TsuminoRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/TsuminoRipper.java new file mode 100644 index 00000000..ff6e8829 --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/TsuminoRipper.java @@ -0,0 +1,108 @@ +package com.rarchives.ripme.ripper.rippers; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.net.URLEncoder; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.json.JSONArray; +import org.json.JSONObject; +import org.jsoup.Connection; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; + + +import com.rarchives.ripme.ripper.AbstractHTMLRipper; +import com.rarchives.ripme.utils.Http; + +public class TsuminoRipper extends AbstractHTMLRipper { + private Map cookies = new HashMap<>(); + + public TsuminoRipper(URL url) throws IOException { + super(url); + } + + private JSONArray getPageUrls() { + String postURL = "http://www.tsumino.com/Read/Load"; + try { + // This sessionId will expire and need to be replaced + cookies.put("ASP.NET_SessionId","c4rbzccf0dvy3e0cloolmlkq"); + logger.info(cookies); + Document doc = Jsoup.connect(postURL).data("q", getAlbumID()).userAgent(USER_AGENT).cookies(cookies).referrer("http://www.tsumino.com/Read/View/" + getAlbumID()).post(); + String jsonInfo = doc.html().replaceAll("","").replaceAll("", "").replaceAll("", "").replaceAll("", "") + .replaceAll("", "").replaceAll("\n", ""); + logger.info(jsonInfo); + JSONObject json = new JSONObject(jsonInfo); + logger.info(json.getJSONArray("reader_page_urls")); + return json.getJSONArray("reader_page_urls"); + } catch (IOException e) { + logger.info(e); + return null; + } + } + + @Override + public String getHost() { + return "tsumino"; + } + + @Override + public String getDomain() { + return "tsumino.com"; + } + + @Override + public String getGID(URL url) throws MalformedURLException { + Pattern p = Pattern.compile("https?://www.tsumino.com/Book/Info/([0-9]+)/([a-zA-Z0-9_-]*)"); + Matcher m = p.matcher(url.toExternalForm()); + if (m.matches()) { + return m.group(1) + "_" + m.group(2); + } + throw new MalformedURLException("Expected tsumino URL format: " + + "tsumino.com/Book/Info/ID/TITLE - got " + url + " instead"); + } + + private String getAlbumID() { + Pattern p = Pattern.compile("https?://www.tsumino.com/Book/Info/([0-9]+)/\\S*"); + Matcher m = p.matcher(url.toExternalForm()); + if (m.matches()) { + return m.group(1); + } + return null; + } + + @Override + public Document getFirstPage() throws IOException { + Connection.Response resp = Http.url(url).response(); + cookies.putAll(resp.cookies()); + // We need to perform a get on http://www.tsumino.com/Read/View/albumID/1 or else the + //www.tsumino.com/Read/Load endpoint 404s + resp = Http.url("http://www.tsumino.com/Book/Info/" + getAlbumID()).response(); + cookies.putAll(resp.cookies()); + return resp.parse(); + } + + @Override + public List getURLsFromPage(Document doc) { + JSONArray imageIds = getPageUrls(); + List result = new ArrayList<>(); + for (int i = 0; i < imageIds.length(); i++) { + result.add("http://www.tsumino.com/Image/Object?name=" + URLEncoder.encode(imageIds.getString(i))); + } + + return result; + } + + @Override + public void downloadURL(URL url, int index) { + sleep(1000); + addURLToDownload(url, getPrefix(index)); + } +}