Rolled E621Ripper back from 1.6.0 to the 1.5.15 version

This commit is contained in:
cyian-1756 2017-10-26 04:47:14 -04:00
parent 3db32d2ffc
commit 3f3f7d6031

View File

@ -1,169 +1,142 @@
// package com.rarchives.ripme.ripper.rippers;
//
// import com.rarchives.ripme.ripper.AbstractHTMLRipper;
// import com.rarchives.ripme.ripper.DownloadThreadPool;
// import com.rarchives.ripme.utils.Http;
// import com.rarchives.ripme.utils.Utils;
// import java.io.IOException;
// import java.io.UnsupportedEncodingException;
// import java.net.MalformedURLException;
// import java.net.URL;
// import java.net.URLDecoder;
// import java.util.ArrayList;
// import java.util.List;
// import java.util.logging.Level;
// import java.util.logging.Logger;
// import org.jsoup.nodes.Document;
// import org.jsoup.nodes.Element;
// import org.jsoup.select.Elements;
//
// public class E621Ripper extends AbstractHTMLRipper {
// private static final int POOL_IMAGES_PER_PAGE = 24;
//
// private DownloadThreadPool e621ThreadPool = new DownloadThreadPool("e621");
//
// private E621Ripper(URL url) throws IOException {
// super(url);
// }
//
// @Override
// public DownloadThreadPool getThreadPool() {
// return e621ThreadPool;
// }
//
// @Override
// public String getDomain() {
// return "e621.net";
// }
//
// @Override
// public String getHost() {
// return "e621";
// }
//
// @Override
// public Document getFirstPage() throws IOException {
// if (url.getPath().startsWith("/pool/show/")) {
// return Http.url("https://e621.net/pool/show/" + getTerm(url)).get();
// } else {
// return Http.url("https://e621.net/post/index/1/" + getTerm(url)).get();
// }
// }
//
// @Override
// public List<String> getURLsFromPage(Document page) {
// Elements elements = page.select("#post-list .thumb a,#pool-show .thumb a");
// List<String> res = new ArrayList<>(elements.size());
//
// if (page.getElementById("pool-show") != null) {
// int index = 0;
//
// Element e = page.getElementById("paginator");
// if (e != null) {
// e = e.getElementsByClass("current").first();
// if (e != null) {
// index = (Integer.parseInt(e.text()) - 1) * POOL_IMAGES_PER_PAGE;
// }
// }
//
// for (Element e_ : elements) {
// res.add(e_.absUrl("href") + "#" + ++index);
// }
//
// } else {
// for (Element e : elements) {
// res.add(e.absUrl("href") + "#" + e.child(0).attr("id").substring(1));
// }
// }
//
// return res;
// }
//
// @Override
// public Document getNextPage(Document page) throws IOException {
// for (Element e : page.select("#paginator a")) {
// if (e.attr("rel").equals("next")) {
// return Http.url(e.absUrl("href")).get();
// }
// }
//
// return null;
// }
//
// @Override
// public void downloadURL(final URL url, int index) {
// e621ThreadPool.addThread(new Thread(() -> {
// try {
// Document page = Http.url(url).get();
// Element e = page.getElementById("image");
//
// if (e != null) {
// addURLToDownload(new URL(e.absUrl("src")), Utils.getConfigBoolean("download.save_order", true) ? url.getRef() + "-" : "");
// } else if ((e = page.select(".content object>param[name=\"movie\"]").first()) != null) {
// addURLToDownload(new URL(e.absUrl("value")), Utils.getConfigBoolean("download.save_order", true) ? url.getRef() + "-" : "");
// } else {
// Logger.getLogger(E621Ripper.class.getName()).log(Level.WARNING, "Unsupported media type - please report to program author: " + url.toString());
// }
//
// } catch (IOException ex) {
// Logger.getLogger(E621Ripper.class.getName()).log(Level.SEVERE, null, ex);
// }
// }));
// }
//
// private String getTerm(URL url) throws MalformedURLException {
// String query = url.getQuery();
//
// if (query != null) {
// return Utils.parseUrlQuery(query, "tags");
// }
//
// if (query == null) {
// if ((query = url.getPath()).startsWith("/post/index/")) {
// query = query.substring(12);
//
// int pos = query.indexOf('/');
// if (pos == -1) {
// return null;
// }
//
// // skip page number
// query = query.substring(pos + 1);
//
// if (query.endsWith("/")) {
// query = query.substring(0, query.length() - 1);
// }
//
// try {
// return URLDecoder.decode(query, "UTF-8");
// } catch (UnsupportedEncodingException e) {
// // Shouldn't happen since UTF-8 is required to be supported
// throw new RuntimeException(e);
// }
//
// } else if (query.startsWith("/pool/show/")) {
// query = query.substring(11);
//
// if (query.endsWith("/")) {
// query = query.substring(0, query.length() - 1);
// }
//
// return query;
// }
// }
//
// return null;
// }
//
// @Override
// public String getGID(URL url) throws MalformedURLException {
// String prefix = "";
// if (url.getPath().startsWith("/pool/show/")) {
// prefix = "pool_";
// } else {
// prefix = "term_";
// }
//
// return Utils.filesystemSafe(prefix + getTerm(url));
// }
// }
package com.rarchives.ripme.ripper.rippers;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.ripper.DownloadThreadPool;
import com.rarchives.ripme.utils.Http;
import com.rarchives.ripme.utils.Utils;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
/**
*
* @author
*/
public class E621Ripper extends AbstractHTMLRipper{
private static Pattern gidPattern=null;
private static Pattern gidPattern2=null;
private static Pattern gidPatternPool=null;
private DownloadThreadPool e621ThreadPool=new DownloadThreadPool("e621");
public E621Ripper(URL url) throws IOException {
super(url);
}
@Override
public DownloadThreadPool getThreadPool() {
return e621ThreadPool;
}
@Override
public String getDomain() {
return "e621.net";
}
@Override
public String getHost() {
return "e621";
}
@Override
public Document getFirstPage() throws IOException {
if(url.getPath().startsWith("/pool/show/"))
return Http.url("https://e621.net/pool/show/"+getTerm(url)).get();
else
return Http.url("https://e621.net/post/index/1/"+getTerm(url)).get();
}
@Override
public List<String> getURLsFromPage(Document page) {
Elements elements=page.select("#post-list .thumb a,#pool-show .thumb a");
List<String> res=new ArrayList<String>(elements.size());
for(Element e:elements){
res.add(e.absUrl("href")+"#"+e.child(0).attr("id").substring(1));
}
return res;
}
@Override
public Document getNextPage(Document page) throws IOException {
for(Element e:page.select("#paginator a")){
if(e.attr("rel").equals("next"))
return Http.url(e.absUrl("href")).get();
}
return null;
}
@Override
public void downloadURL(final URL url, int index) {
e621ThreadPool.addThread(new Thread(new Runnable() {
public void run() {
try {
Document page=Http.url(url).get();
addURLToDownload(new URL(page.getElementById("image").absUrl("src")),Utils.getConfigBoolean("download.save_order",true)?url.getRef()+"-":"");
} catch (IOException ex) {
Logger.getLogger(E621Ripper.class.getName()).log(Level.SEVERE, null, ex);
}
}
}));
}
private String getTerm(URL url) throws MalformedURLException{
if(gidPattern==null)
gidPattern=Pattern.compile("^https?://(www\\.)?e621\\.net/post/index/[^/]+/([a-zA-Z0-9$_.+!*'(),%-]+)(/.*)?(#.*)?$");
if(gidPatternPool==null)
gidPatternPool=Pattern.compile("^https?://(www\\.)?e621\\.net/pool/show/([a-zA-Z0-9$_.+!*'(),%-]+)(\\?.*)?(/.*)?(#.*)?$");
Matcher m = gidPattern.matcher(url.toExternalForm());
if(m.matches())
return m.group(2);
m = gidPatternPool.matcher(url.toExternalForm());
if(m.matches())
return m.group(2);
throw new MalformedURLException("Expected e621.net URL format: e621.net/post/index/1/searchterm - got "+url+" instead");
}
@Override
public String getGID(URL url) throws MalformedURLException {
try {
String prefix="";
if(url.getPath().startsWith("/pool/show/"))
prefix="pool_";
return Utils.filesystemSafe(prefix+new URI(getTerm(url)).getPath());
} catch (URISyntaxException ex) {
Logger.getLogger(PahealRipper.class.getName()).log(Level.SEVERE, null, ex);
}
throw new MalformedURLException("Expected e621.net URL format: e621.net/post/index/1/searchterm - got "+url+" instead");
}
@Override
public URL sanitizeURL(URL url) throws MalformedURLException {
if(gidPattern2==null)
gidPattern2=Pattern.compile("^https?://(www\\.)?e621\\.net/post/search\\?tags=([a-zA-Z0-9$_.+!*'(),%-]+)(/.*)?(#.*)?$");
Matcher m = gidPattern2.matcher(url.toExternalForm());
if(m.matches())
return new URL("https://e621.net/post/index/1/"+m.group(2).replace("+","%20"));
return url;
}
}