From e6ea9a8fdfaa2ea3faf37f1ce1a48e0949ce9859 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Fri, 17 Aug 2018 09:25:19 -0400 Subject: [PATCH] Added ripper for Gelbooru.com --- .../ripme/ripper/rippers/GelbooruRipper.java | 96 +++++++++++++++++++ 1 file changed, 96 insertions(+) create mode 100644 src/main/java/com/rarchives/ripme/ripper/rippers/GelbooruRipper.java diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/GelbooruRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/GelbooruRipper.java new file mode 100644 index 00000000..9d3b483a --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/GelbooruRipper.java @@ -0,0 +1,96 @@ +package com.rarchives.ripme.ripper.rippers; + +import com.rarchives.ripme.ripper.AbstractHTMLRipper; +import com.rarchives.ripme.utils.Http; +import com.rarchives.ripme.utils.Utils; +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URI; +import java.net.URISyntaxException; +import java.net.URL; +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.apache.log4j.Logger; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; + +public class GelbooruRipper extends AbstractHTMLRipper { + private static final Logger logger = Logger.getLogger(XbooruRipper.class); + + private static Pattern gidPattern = null; + + public GelbooruRipper(URL url) throws IOException { + super(url); + } + + @Override + public String getDomain() { + return "gelbooru.com"; + } + + @Override + public String getHost() { + return "gelbooru"; + } + + private String getPage(int num) throws MalformedURLException { + return "https://gelbooru.com/index.php?page=dapi&s=post&q=index&pid=" + num + "&tags=" + getTerm(url); + } + + @Override + public Document getFirstPage() throws IOException { + return Http.url(getPage(0)).get(); + } + + @Override + public Document getNextPage(Document doc) throws IOException { + int offset = Integer.parseInt(doc.getElementsByTag("posts").first().attr("offset")); + int num = Integer.parseInt(doc.getElementsByTag("posts").first().attr("count")); + + if (offset + 100 > num) { + return null; + } + + return Http.url(getPage(offset / 100 + 1)).get(); + } + + @Override + public List getURLsFromPage(Document page) { + List res = new ArrayList<>(100); + for (Element e : page.getElementsByTag("post")) { + res.add(e.absUrl("file_url") + "#" + e.attr("id")); + } + return res; + } + + @Override + public void downloadURL(URL url, int index) { + addURLToDownload(url, Utils.getConfigBoolean("download.save_order", true) ? url.getRef() + "-" : ""); + } + + private String getTerm(URL url) throws MalformedURLException { + if (gidPattern == null) { + gidPattern = Pattern.compile("^https?://(www\\.)?gelbooru\\.com/(index.php)?.*([?&]tags=([a-zA-Z0-9$_.+!*'(),%-]+))(&|(#.*)?$)"); + } + + Matcher m = gidPattern.matcher(url.toExternalForm()); + if (m.matches()) { + return m.group(4); + } + + throw new MalformedURLException("Expected gelbooru.com URL format: gelbooru.com/index.php?tags=searchterm - got " + url + " instead"); + } + + @Override + public String getGID(URL url) throws MalformedURLException { + try { + return Utils.filesystemSafe(new URI(getTerm(url)).getPath()); + } catch (URISyntaxException ex) { + logger.error(ex); + } + + throw new MalformedURLException("Expected gelbooru.com URL format: xbooru.com/index.php?tags=searchterm - got " + url + " instead"); + } +} \ No newline at end of file