diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/E621Ripper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/E621Ripper.java index 407539b4..231533e2 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/E621Ripper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/E621Ripper.java @@ -1,169 +1,142 @@ -// package com.rarchives.ripme.ripper.rippers; -// -// import com.rarchives.ripme.ripper.AbstractHTMLRipper; -// import com.rarchives.ripme.ripper.DownloadThreadPool; -// import com.rarchives.ripme.utils.Http; -// import com.rarchives.ripme.utils.Utils; -// import java.io.IOException; -// import java.io.UnsupportedEncodingException; -// import java.net.MalformedURLException; -// import java.net.URL; -// import java.net.URLDecoder; -// import java.util.ArrayList; -// import java.util.List; -// import java.util.logging.Level; -// import java.util.logging.Logger; -// import org.jsoup.nodes.Document; -// import org.jsoup.nodes.Element; -// import org.jsoup.select.Elements; -// -// public class E621Ripper extends AbstractHTMLRipper { -// private static final int POOL_IMAGES_PER_PAGE = 24; -// -// private DownloadThreadPool e621ThreadPool = new DownloadThreadPool("e621"); -// -// private E621Ripper(URL url) throws IOException { -// super(url); -// } -// -// @Override -// public DownloadThreadPool getThreadPool() { -// return e621ThreadPool; -// } -// -// @Override -// public String getDomain() { -// return "e621.net"; -// } -// -// @Override -// public String getHost() { -// return "e621"; -// } -// -// @Override -// public Document getFirstPage() throws IOException { -// if (url.getPath().startsWith("/pool/show/")) { -// return Http.url("https://e621.net/pool/show/" + getTerm(url)).get(); -// } else { -// return Http.url("https://e621.net/post/index/1/" + getTerm(url)).get(); -// } -// } -// -// @Override -// public List getURLsFromPage(Document page) { -// Elements elements = page.select("#post-list .thumb a,#pool-show .thumb a"); -// List res = new ArrayList<>(elements.size()); -// -// if (page.getElementById("pool-show") != null) { -// int index = 0; -// -// Element e = page.getElementById("paginator"); -// if (e != null) { -// e = e.getElementsByClass("current").first(); -// if (e != null) { -// index = (Integer.parseInt(e.text()) - 1) * POOL_IMAGES_PER_PAGE; -// } -// } -// -// for (Element e_ : elements) { -// res.add(e_.absUrl("href") + "#" + ++index); -// } -// -// } else { -// for (Element e : elements) { -// res.add(e.absUrl("href") + "#" + e.child(0).attr("id").substring(1)); -// } -// } -// -// return res; -// } -// -// @Override -// public Document getNextPage(Document page) throws IOException { -// for (Element e : page.select("#paginator a")) { -// if (e.attr("rel").equals("next")) { -// return Http.url(e.absUrl("href")).get(); -// } -// } -// -// return null; -// } -// -// @Override -// public void downloadURL(final URL url, int index) { -// e621ThreadPool.addThread(new Thread(() -> { -// try { -// Document page = Http.url(url).get(); -// Element e = page.getElementById("image"); -// -// if (e != null) { -// addURLToDownload(new URL(e.absUrl("src")), Utils.getConfigBoolean("download.save_order", true) ? url.getRef() + "-" : ""); -// } else if ((e = page.select(".content object>param[name=\"movie\"]").first()) != null) { -// addURLToDownload(new URL(e.absUrl("value")), Utils.getConfigBoolean("download.save_order", true) ? url.getRef() + "-" : ""); -// } else { -// Logger.getLogger(E621Ripper.class.getName()).log(Level.WARNING, "Unsupported media type - please report to program author: " + url.toString()); -// } -// -// } catch (IOException ex) { -// Logger.getLogger(E621Ripper.class.getName()).log(Level.SEVERE, null, ex); -// } -// })); -// } -// -// private String getTerm(URL url) throws MalformedURLException { -// String query = url.getQuery(); -// -// if (query != null) { -// return Utils.parseUrlQuery(query, "tags"); -// } -// -// if (query == null) { -// if ((query = url.getPath()).startsWith("/post/index/")) { -// query = query.substring(12); -// -// int pos = query.indexOf('/'); -// if (pos == -1) { -// return null; -// } -// -// // skip page number -// query = query.substring(pos + 1); -// -// if (query.endsWith("/")) { -// query = query.substring(0, query.length() - 1); -// } -// -// try { -// return URLDecoder.decode(query, "UTF-8"); -// } catch (UnsupportedEncodingException e) { -// // Shouldn't happen since UTF-8 is required to be supported -// throw new RuntimeException(e); -// } -// -// } else if (query.startsWith("/pool/show/")) { -// query = query.substring(11); -// -// if (query.endsWith("/")) { -// query = query.substring(0, query.length() - 1); -// } -// -// return query; -// } -// } -// -// return null; -// } -// -// @Override -// public String getGID(URL url) throws MalformedURLException { -// String prefix = ""; -// if (url.getPath().startsWith("/pool/show/")) { -// prefix = "pool_"; -// } else { -// prefix = "term_"; -// } -// -// return Utils.filesystemSafe(prefix + getTerm(url)); -// } -// } + +package com.rarchives.ripme.ripper.rippers; + +import com.rarchives.ripme.ripper.AbstractHTMLRipper; +import com.rarchives.ripme.ripper.DownloadThreadPool; +import com.rarchives.ripme.utils.Http; +import com.rarchives.ripme.utils.Utils; +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URI; +import java.net.URISyntaxException; +import java.net.URL; +import java.util.ArrayList; +import java.util.List; +import java.util.logging.Level; +import java.util.logging.Logger; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +/** + * + * @author + */ +public class E621Ripper extends AbstractHTMLRipper{ + private static Pattern gidPattern=null; + private static Pattern gidPattern2=null; + private static Pattern gidPatternPool=null; + + private DownloadThreadPool e621ThreadPool=new DownloadThreadPool("e621"); + + public E621Ripper(URL url) throws IOException { + super(url); + } + + @Override + public DownloadThreadPool getThreadPool() { + return e621ThreadPool; + } + + @Override + public String getDomain() { + return "e621.net"; + } + + @Override + public String getHost() { + return "e621"; + } + + @Override + public Document getFirstPage() throws IOException { + if(url.getPath().startsWith("/pool/show/")) + return Http.url("https://e621.net/pool/show/"+getTerm(url)).get(); + else + return Http.url("https://e621.net/post/index/1/"+getTerm(url)).get(); + } + + @Override + public List getURLsFromPage(Document page) { + Elements elements=page.select("#post-list .thumb a,#pool-show .thumb a"); + List res=new ArrayList(elements.size()); + + for(Element e:elements){ + res.add(e.absUrl("href")+"#"+e.child(0).attr("id").substring(1)); + } + + return res; + } + + @Override + public Document getNextPage(Document page) throws IOException { + for(Element e:page.select("#paginator a")){ + if(e.attr("rel").equals("next")) + return Http.url(e.absUrl("href")).get(); + } + + return null; + } + + @Override + public void downloadURL(final URL url, int index) { + e621ThreadPool.addThread(new Thread(new Runnable() { + public void run() { + try { + Document page=Http.url(url).get(); + + addURLToDownload(new URL(page.getElementById("image").absUrl("src")),Utils.getConfigBoolean("download.save_order",true)?url.getRef()+"-":""); + } catch (IOException ex) { + Logger.getLogger(E621Ripper.class.getName()).log(Level.SEVERE, null, ex); + } + } + })); + } + + private String getTerm(URL url) throws MalformedURLException{ + if(gidPattern==null) + gidPattern=Pattern.compile("^https?://(www\\.)?e621\\.net/post/index/[^/]+/([a-zA-Z0-9$_.+!*'(),%-]+)(/.*)?(#.*)?$"); + if(gidPatternPool==null) + gidPatternPool=Pattern.compile("^https?://(www\\.)?e621\\.net/pool/show/([a-zA-Z0-9$_.+!*'(),%-]+)(\\?.*)?(/.*)?(#.*)?$"); + + Matcher m = gidPattern.matcher(url.toExternalForm()); + if(m.matches()) + return m.group(2); + + m = gidPatternPool.matcher(url.toExternalForm()); + if(m.matches()) + return m.group(2); + + throw new MalformedURLException("Expected e621.net URL format: e621.net/post/index/1/searchterm - got "+url+" instead"); + } + + @Override + public String getGID(URL url) throws MalformedURLException { + try { + String prefix=""; + if(url.getPath().startsWith("/pool/show/")) + prefix="pool_"; + + return Utils.filesystemSafe(prefix+new URI(getTerm(url)).getPath()); + } catch (URISyntaxException ex) { + Logger.getLogger(PahealRipper.class.getName()).log(Level.SEVERE, null, ex); + } + + throw new MalformedURLException("Expected e621.net URL format: e621.net/post/index/1/searchterm - got "+url+" instead"); + } + + @Override + public URL sanitizeURL(URL url) throws MalformedURLException { + if(gidPattern2==null) + gidPattern2=Pattern.compile("^https?://(www\\.)?e621\\.net/post/search\\?tags=([a-zA-Z0-9$_.+!*'(),%-]+)(/.*)?(#.*)?$"); + + Matcher m = gidPattern2.matcher(url.toExternalForm()); + if(m.matches()) + return new URL("https://e621.net/post/index/1/"+m.group(2).replace("+","%20")); + + return url; + } + +}