From de105edffe0f9f61783ee8bea326de8af558afd3 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Thu, 17 May 2018 19:46:05 -0400 Subject: [PATCH 1/5] Added ripper for bato.to --- .../ripme/ripper/rippers/BatoRipper.java | 109 ++++++++++++++++++ 1 file changed, 109 insertions(+) create mode 100644 src/main/java/com/rarchives/ripme/ripper/rippers/BatoRipper.java diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/BatoRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/BatoRipper.java new file mode 100644 index 00000000..a25aa16b --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/BatoRipper.java @@ -0,0 +1,109 @@ +package com.rarchives.ripme.ripper.rippers; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.json.JSONObject; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; + +import com.rarchives.ripme.ripper.AbstractHTMLRipper; +import com.rarchives.ripme.utils.Http; + +public class BatoRipper extends AbstractHTMLRipper { + + public BatoRipper(URL url) throws IOException { + super(url); + } + + @Override + public String getHost() { + return "bato"; + } + + @Override + public String getDomain() { + return "bato.to"; + } + + @Override + public String getGID(URL url) throws MalformedURLException { + Pattern p = Pattern.compile("https?://bato.to/chapter/([\\d]+)/?"); + Matcher m = p.matcher(url.toExternalForm()); + if (m.matches()) { + return m.group(1); + } + throw new MalformedURLException("Expected bato.to URL format: " + + "bato.to/chapter/ID - got " + url + " instead"); + } + + @Override + public boolean canRip(URL url) { + Pattern p = Pattern.compile("https?://bato.to/series/([\\d]+)/?"); + Matcher m = p.matcher(url.toExternalForm()); + if (m.matches()) { + return true; + } + + p = Pattern.compile("https?://bato.to/chapter/([\\d]+)/?"); + m = p.matcher(url.toExternalForm()); + if (m.matches()) { + return true; + } + return false; + } + + @Override + public Document getFirstPage() throws IOException { + // "url" is an instance field of the superclass + return Http.url(url).get(); + } + + @Override + public Document getNextPage(Document doc) throws IOException { + // Find next page + String nextUrl = ""; + // We use comic-nav-next to the find the next page + Element elem = doc.select("div.nav-next > a").first(); + // If there are no more chapters to download the last chapter will link to bato.to/series/ID + if (elem == null || elem.attr("href").contains("/series/")) { + throw new IOException("No more pages"); + } + String nextPage = elem.attr("href"); + return Http.url("https://bato.to" + nextPage).get(); + } + + @Override + public List getURLsFromPage(Document doc) { + List result = new ArrayList<>(); + for (Element script : doc.select("script")) { + if (script.data().contains("var images = ")) { + String s = script.data(); + s = s.replaceAll("var seriesId = \\d+;", ""); + s = s.replaceAll("var chapterId = \\d+;", ""); + s = s.replaceAll("var pages = \\d+;", ""); + s = s.replaceAll("var page = \\d+;", ""); + s = s.replaceAll("var prevCha = null;", ""); + s = s.replaceAll("var nextCha = \\.*;", ""); + String json = s.replaceAll("var images = ", "").replaceAll(";", ""); + logger.info(s); + JSONObject images = new JSONObject(json); + for (int i = 1; i < images.length() +1; i++) { + result.add(images.getString(Integer.toString(i))); + } + + } + } + return result; + } + + @Override + public void downloadURL(URL url, int index) { + addURLToDownload(url, getPrefix(index)); + } +} From 8a27941ddee14d9c2858636c90d71670e37c8d2b Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Thu, 17 May 2018 19:47:54 -0400 Subject: [PATCH 2/5] Added test for BatoRipper --- .../ripme/tst/ripper/rippers/BatoRipperTest.java | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 src/test/java/com/rarchives/ripme/tst/ripper/rippers/BatoRipperTest.java diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/BatoRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/BatoRipperTest.java new file mode 100644 index 00000000..073c304d --- /dev/null +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/BatoRipperTest.java @@ -0,0 +1,13 @@ +package com.rarchives.ripme.tst.ripper.rippers; + +import java.io.IOException; +import java.net.URL; + +import com.rarchives.ripme.ripper.rippers.BatoRipper; + +public class BatoRipperTest extends RippersTest { + public void testRip() throws IOException { + BatoRipper ripper = new BatoRipper(new URL("https://bato.to/chapter/1207152")); + testRipper(ripper); + } +} From 05b50a4513e83e7838f03a0198c72dee4abb4e30 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Thu, 17 May 2018 19:56:41 -0400 Subject: [PATCH 3/5] Added getAlbumTitle to batoRipper --- .../rarchives/ripme/ripper/rippers/BatoRipper.java | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/BatoRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/BatoRipper.java index a25aa16b..6578b7b1 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/BatoRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/BatoRipper.java @@ -42,6 +42,18 @@ public class BatoRipper extends AbstractHTMLRipper { "bato.to/chapter/ID - got " + url + " instead"); } + @Override + public String getAlbumTitle(URL url) throws MalformedURLException { + try { + // Attempt to use album title as GID + return getHost() + "_" + getGID(url) + "_" + getFirstPage().select("title").first().text().replaceAll(" ", "_"); + } catch (IOException e) { + // Fall back to default album naming convention + logger.info("Unable to find title at " + url); + } + return super.getAlbumTitle(url); + } + @Override public boolean canRip(URL url) { Pattern p = Pattern.compile("https?://bato.to/series/([\\d]+)/?"); From 9281766a71ea8a619f40b866358893e77b521392 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Thu, 17 May 2018 19:57:02 -0400 Subject: [PATCH 4/5] Extented BatoRipper tests --- .../ripme/tst/ripper/rippers/BatoRipperTest.java | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/BatoRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/BatoRipperTest.java index 073c304d..6bd8744a 100644 --- a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/BatoRipperTest.java +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/BatoRipperTest.java @@ -10,4 +10,16 @@ public class BatoRipperTest extends RippersTest { BatoRipper ripper = new BatoRipper(new URL("https://bato.to/chapter/1207152")); testRipper(ripper); } + + public void testGetGID() throws IOException { + URL url = new URL("https://bato.to/chapter/1207152"); + BatoRipper ripper = new BatoRipper(url); + assertEquals("1207152", ripper.getGID(url)); + } + + public void testGetAlbumTitle() throws IOException { + URL url = new URL("https://bato.to/chapter/1207152"); + BatoRipper ripper = new BatoRipper(url); + assertEquals("bato_1207152_I_Messed_Up_by_Teaching_at_a_Black_Gyaru_School!_Ch.2", ripper.getAlbumTitle(url)); + } } From bb60779469a8c833645c848f6aff7338fcfefb50 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Thu, 17 May 2018 20:07:16 -0400 Subject: [PATCH 5/5] Removed getnextpage; add support for quickQueue --- .../ripme/ripper/rippers/BatoRipper.java | 44 +++++++++++++------ 1 file changed, 30 insertions(+), 14 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/BatoRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/BatoRipper.java index 6578b7b1..a3350e68 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/BatoRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/BatoRipper.java @@ -38,10 +38,40 @@ public class BatoRipper extends AbstractHTMLRipper { if (m.matches()) { return m.group(1); } + // As this is just for quick queue support it does matter what this if returns + p = Pattern.compile("https?://bato.to/series/([\\d]+)/?"); + m = p.matcher(url.toExternalForm()); + if (m.matches()) { + return ""; + } throw new MalformedURLException("Expected bato.to URL format: " + "bato.to/chapter/ID - got " + url + " instead"); } + @Override + public boolean hasQueueSupport() { + return true; + } + + @Override + public boolean pageContainsAlbums(URL url) { + Pattern p = Pattern.compile("https?://bato.to/series/([\\d]+)/?"); + Matcher m = p.matcher(url.toExternalForm()); + if (m.matches()) { + return true; + } + return false; + } + + @Override + public List getAlbumsToQueue(Document doc) { + List urlsToAddToQueue = new ArrayList<>(); + for (Element elem : doc.select("div.main > div > a")) { + urlsToAddToQueue.add("https://" + getDomain() + elem.attr("href")); + } + return urlsToAddToQueue; + } + @Override public String getAlbumTitle(URL url) throws MalformedURLException { try { @@ -76,20 +106,6 @@ public class BatoRipper extends AbstractHTMLRipper { return Http.url(url).get(); } - @Override - public Document getNextPage(Document doc) throws IOException { - // Find next page - String nextUrl = ""; - // We use comic-nav-next to the find the next page - Element elem = doc.select("div.nav-next > a").first(); - // If there are no more chapters to download the last chapter will link to bato.to/series/ID - if (elem == null || elem.attr("href").contains("/series/")) { - throw new IOException("No more pages"); - } - String nextPage = elem.attr("href"); - return Http.url("https://bato.to" + nextPage).get(); - } - @Override public List getURLsFromPage(Document doc) { List result = new ArrayList<>();