From e6ea9a8fdfaa2ea3faf37f1ce1a48e0949ce9859 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Fri, 17 Aug 2018 09:25:19 -0400 Subject: [PATCH 1/2] Added ripper for Gelbooru.com --- .../ripme/ripper/rippers/GelbooruRipper.java | 96 +++++++++++++++++++ 1 file changed, 96 insertions(+) create mode 100644 src/main/java/com/rarchives/ripme/ripper/rippers/GelbooruRipper.java diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/GelbooruRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/GelbooruRipper.java new file mode 100644 index 00000000..9d3b483a --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/GelbooruRipper.java @@ -0,0 +1,96 @@ +package com.rarchives.ripme.ripper.rippers; + +import com.rarchives.ripme.ripper.AbstractHTMLRipper; +import com.rarchives.ripme.utils.Http; +import com.rarchives.ripme.utils.Utils; +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URI; +import java.net.URISyntaxException; +import java.net.URL; +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.apache.log4j.Logger; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; + +public class GelbooruRipper extends AbstractHTMLRipper { + private static final Logger logger = Logger.getLogger(XbooruRipper.class); + + private static Pattern gidPattern = null; + + public GelbooruRipper(URL url) throws IOException { + super(url); + } + + @Override + public String getDomain() { + return "gelbooru.com"; + } + + @Override + public String getHost() { + return "gelbooru"; + } + + private String getPage(int num) throws MalformedURLException { + return "https://gelbooru.com/index.php?page=dapi&s=post&q=index&pid=" + num + "&tags=" + getTerm(url); + } + + @Override + public Document getFirstPage() throws IOException { + return Http.url(getPage(0)).get(); + } + + @Override + public Document getNextPage(Document doc) throws IOException { + int offset = Integer.parseInt(doc.getElementsByTag("posts").first().attr("offset")); + int num = Integer.parseInt(doc.getElementsByTag("posts").first().attr("count")); + + if (offset + 100 > num) { + return null; + } + + return Http.url(getPage(offset / 100 + 1)).get(); + } + + @Override + public List getURLsFromPage(Document page) { + List res = new ArrayList<>(100); + for (Element e : page.getElementsByTag("post")) { + res.add(e.absUrl("file_url") + "#" + e.attr("id")); + } + return res; + } + + @Override + public void downloadURL(URL url, int index) { + addURLToDownload(url, Utils.getConfigBoolean("download.save_order", true) ? url.getRef() + "-" : ""); + } + + private String getTerm(URL url) throws MalformedURLException { + if (gidPattern == null) { + gidPattern = Pattern.compile("^https?://(www\\.)?gelbooru\\.com/(index.php)?.*([?&]tags=([a-zA-Z0-9$_.+!*'(),%-]+))(&|(#.*)?$)"); + } + + Matcher m = gidPattern.matcher(url.toExternalForm()); + if (m.matches()) { + return m.group(4); + } + + throw new MalformedURLException("Expected gelbooru.com URL format: gelbooru.com/index.php?tags=searchterm - got " + url + " instead"); + } + + @Override + public String getGID(URL url) throws MalformedURLException { + try { + return Utils.filesystemSafe(new URI(getTerm(url)).getPath()); + } catch (URISyntaxException ex) { + logger.error(ex); + } + + throw new MalformedURLException("Expected gelbooru.com URL format: xbooru.com/index.php?tags=searchterm - got " + url + " instead"); + } +} \ No newline at end of file From fd3b2c55987cf5cb0faf3901a727c02f39ef4880 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Fri, 17 Aug 2018 09:52:51 -0400 Subject: [PATCH 2/2] Added genertic ripper for Booru sites --- .../{XbooruRipper.java => BooruRipper.java} | 35 ++++--- .../ripme/ripper/rippers/GelbooruRipper.java | 96 ------------------- ...ruRipperTest.java => BooruRipperTest.java} | 8 +- 3 files changed, 27 insertions(+), 112 deletions(-) rename src/main/java/com/rarchives/ripme/ripper/rippers/{XbooruRipper.java => BooruRipper.java} (67%) delete mode 100644 src/main/java/com/rarchives/ripme/ripper/rippers/GelbooruRipper.java rename src/test/java/com/rarchives/ripme/tst/ripper/rippers/{XbooruRipperTest.java => BooruRipperTest.java} (57%) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/XbooruRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/BooruRipper.java similarity index 67% rename from src/main/java/com/rarchives/ripme/ripper/rippers/XbooruRipper.java rename to src/main/java/com/rarchives/ripme/ripper/rippers/BooruRipper.java index 6b697f70..7d6b17a6 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/XbooruRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/BooruRipper.java @@ -16,27 +16,38 @@ import org.apache.log4j.Logger; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; -public class XbooruRipper extends AbstractHTMLRipper { - private static final Logger logger = Logger.getLogger(XbooruRipper.class); +public class BooruRipper extends AbstractHTMLRipper { + private static final Logger logger = Logger.getLogger(BooruRipper.class); private static Pattern gidPattern = null; - public XbooruRipper(URL url) throws IOException { + public BooruRipper(URL url) throws IOException { super(url); } @Override - public String getDomain() { - return "xbooru.com"; + public boolean canRip(URL url) { + if (url.toExternalForm().contains("xbooru") || url.toExternalForm().contains("gelbooru")) { + return true; + } + return false; + } @Override public String getHost() { - return "xbooru"; + logger.info(url.toExternalForm().split("/")[2]); + return url.toExternalForm().split("/")[2].split("\\.")[0]; + } + + @Override + public String getDomain() { + return url.toExternalForm().split("/")[2]; } private String getPage(int num) throws MalformedURLException { - return "http://xbooru.com/index.php?page=dapi&s=post&q=index&pid=" + num + "&tags=" + getTerm(url); + return "http://" + getHost() + ".com/index.php?page=dapi&s=post&q=index&pid=" + num + "&tags=" + getTerm(url); + } @Override @@ -72,7 +83,7 @@ public class XbooruRipper extends AbstractHTMLRipper { private String getTerm(URL url) throws MalformedURLException { if (gidPattern == null) { - gidPattern = Pattern.compile("^https?://(www\\.)?xbooru\\.com/(index.php)?.*([?&]tags=([a-zA-Z0-9$_.+!*'(),%-]+))(&|(#.*)?$)"); + gidPattern = Pattern.compile("^https?://(www\\.)?(x|gel)booru\\.com/(index.php)?.*([?&]tags=([a-zA-Z0-9$_.+!*'(),%-]+))(&|(#.*)?$)"); } Matcher m = gidPattern.matcher(url.toExternalForm()); @@ -80,17 +91,17 @@ public class XbooruRipper extends AbstractHTMLRipper { return m.group(4); } - throw new MalformedURLException("Expected xbooru.com URL format: xbooru.com/index.php?tags=searchterm - got " + url + " instead"); + throw new MalformedURLException("Expected xbooru.com URL format: " + getHost() + ".com/index.php?tags=searchterm - got " + url + " instead"); } @Override public String getGID(URL url) throws MalformedURLException { try { - return Utils.filesystemSafe(new URI(getTerm(url)).getPath()); + return Utils.filesystemSafe(new URI(getTerm(url).replaceAll("&tags=", "")).getPath()); } catch (URISyntaxException ex) { logger.error(ex); } - throw new MalformedURLException("Expected xbooru.com URL format: xbooru.com/index.php?tags=searchterm - got " + url + " instead"); + throw new MalformedURLException("Expected xbooru.com URL format: " + getHost() + ".com/index.php?tags=searchterm - got " + url + " instead"); } -} +} \ No newline at end of file diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/GelbooruRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/GelbooruRipper.java deleted file mode 100644 index 9d3b483a..00000000 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/GelbooruRipper.java +++ /dev/null @@ -1,96 +0,0 @@ -package com.rarchives.ripme.ripper.rippers; - -import com.rarchives.ripme.ripper.AbstractHTMLRipper; -import com.rarchives.ripme.utils.Http; -import com.rarchives.ripme.utils.Utils; -import java.io.IOException; -import java.net.MalformedURLException; -import java.net.URI; -import java.net.URISyntaxException; -import java.net.URL; -import java.util.ArrayList; -import java.util.List; -import java.util.regex.Matcher; -import java.util.regex.Pattern; -import org.apache.log4j.Logger; -import org.jsoup.nodes.Document; -import org.jsoup.nodes.Element; - -public class GelbooruRipper extends AbstractHTMLRipper { - private static final Logger logger = Logger.getLogger(XbooruRipper.class); - - private static Pattern gidPattern = null; - - public GelbooruRipper(URL url) throws IOException { - super(url); - } - - @Override - public String getDomain() { - return "gelbooru.com"; - } - - @Override - public String getHost() { - return "gelbooru"; - } - - private String getPage(int num) throws MalformedURLException { - return "https://gelbooru.com/index.php?page=dapi&s=post&q=index&pid=" + num + "&tags=" + getTerm(url); - } - - @Override - public Document getFirstPage() throws IOException { - return Http.url(getPage(0)).get(); - } - - @Override - public Document getNextPage(Document doc) throws IOException { - int offset = Integer.parseInt(doc.getElementsByTag("posts").first().attr("offset")); - int num = Integer.parseInt(doc.getElementsByTag("posts").first().attr("count")); - - if (offset + 100 > num) { - return null; - } - - return Http.url(getPage(offset / 100 + 1)).get(); - } - - @Override - public List getURLsFromPage(Document page) { - List res = new ArrayList<>(100); - for (Element e : page.getElementsByTag("post")) { - res.add(e.absUrl("file_url") + "#" + e.attr("id")); - } - return res; - } - - @Override - public void downloadURL(URL url, int index) { - addURLToDownload(url, Utils.getConfigBoolean("download.save_order", true) ? url.getRef() + "-" : ""); - } - - private String getTerm(URL url) throws MalformedURLException { - if (gidPattern == null) { - gidPattern = Pattern.compile("^https?://(www\\.)?gelbooru\\.com/(index.php)?.*([?&]tags=([a-zA-Z0-9$_.+!*'(),%-]+))(&|(#.*)?$)"); - } - - Matcher m = gidPattern.matcher(url.toExternalForm()); - if (m.matches()) { - return m.group(4); - } - - throw new MalformedURLException("Expected gelbooru.com URL format: gelbooru.com/index.php?tags=searchterm - got " + url + " instead"); - } - - @Override - public String getGID(URL url) throws MalformedURLException { - try { - return Utils.filesystemSafe(new URI(getTerm(url)).getPath()); - } catch (URISyntaxException ex) { - logger.error(ex); - } - - throw new MalformedURLException("Expected gelbooru.com URL format: xbooru.com/index.php?tags=searchterm - got " + url + " instead"); - } -} \ No newline at end of file diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/XbooruRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/BooruRipperTest.java similarity index 57% rename from src/test/java/com/rarchives/ripme/tst/ripper/rippers/XbooruRipperTest.java rename to src/test/java/com/rarchives/ripme/tst/ripper/rippers/BooruRipperTest.java index 6dfc0bab..31041175 100644 --- a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/XbooruRipperTest.java +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/BooruRipperTest.java @@ -3,17 +3,17 @@ package com.rarchives.ripme.tst.ripper.rippers; import java.io.IOException; import java.net.URL; -import com.rarchives.ripme.ripper.rippers.XbooruRipper; +import com.rarchives.ripme.ripper.rippers.BooruRipper; -public class XbooruRipperTest extends RippersTest { +public class BooruRipperTest extends RippersTest { public void testRip() throws IOException { - XbooruRipper ripper = new XbooruRipper(new URL("http://xbooru.com/index.php?page=post&s=list&tags=furry")); + BooruRipper ripper = new BooruRipper(new URL("http://xbooru.com/index.php?page=post&s=list&tags=furry")); testRipper(ripper); } public void testGetGID() throws IOException { URL url = new URL("http://xbooru.com/index.php?page=post&s=list&tags=furry"); - XbooruRipper ripper = new XbooruRipper(url); + BooruRipper ripper = new BooruRipper(url); assertEquals("furry", ripper.getGID(url)); } } \ No newline at end of file