diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/E621Ripper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/E621Ripper.java new file mode 100644 index 00000000..190320f9 --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/E621Ripper.java @@ -0,0 +1,142 @@ + +package com.rarchives.ripme.ripper.rippers; + +import com.rarchives.ripme.ripper.AbstractHTMLRipper; +import com.rarchives.ripme.ripper.DownloadThreadPool; +import com.rarchives.ripme.utils.Http; +import com.rarchives.ripme.utils.Utils; +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URI; +import java.net.URISyntaxException; +import java.net.URL; +import java.util.ArrayList; +import java.util.List; +import java.util.logging.Level; +import java.util.logging.Logger; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +/** + * + * @author + */ +public class E621Ripper extends AbstractHTMLRipper{ + private static Pattern gidPattern=null; + private static Pattern gidPattern2=null; + private static Pattern gidPatternPool=null; + + private DownloadThreadPool e621ThreadPool=new DownloadThreadPool("e621"); + + public E621Ripper(URL url) throws IOException { + super(url); + } + + @Override + public DownloadThreadPool getThreadPool() { + return e621ThreadPool; + } + + @Override + public String getDomain() { + return "e621.net"; + } + + @Override + public String getHost() { + return "e621"; + } + + @Override + public Document getFirstPage() throws IOException { + if(url.getPath().startsWith("/pool/show/")) + return Http.url("https://e621.net/pool/show/"+getTerm(url)).get(); + else + return Http.url("https://e621.net/post/index/1/"+getTerm(url)).get(); + } + + @Override + public List getURLsFromPage(Document page) { + Elements elements=page.select("#post-list .thumb a,#pool-show .thumb a"); + List res=new ArrayList(elements.size()); + + for(Element e:elements){ + res.add(e.absUrl("href")+"#"+e.child(0).attr("id").substring(1)); + } + + return res; + } + + @Override + public Document getNextPage(Document page) throws IOException { + for(Element e:page.select("#paginator a")){ + if(e.attr("rel").equals("next")) + return Http.url(e.absUrl("href")).get(); + } + + return null; + } + + @Override + public void downloadURL(final URL url, int index) { + e621ThreadPool.addThread(new Thread(new Runnable() { + public void run() { + try { + Document page=Http.url(url).get(); + + addURLToDownload(new URL(page.getElementById("image").absUrl("src")),Utils.getConfigBoolean("download.save_order",true)?url.getRef()+"-":""); + } catch (IOException ex) { + Logger.getLogger(E621Ripper.class.getName()).log(Level.SEVERE, null, ex); + } + } + })); + } + + private String getTerm(URL url) throws MalformedURLException{ + if(gidPattern==null) + gidPattern=Pattern.compile("^https?://(www\\.)?e621\\.net/post/index/[^/]+/([a-zA-Z0-9$_.+!*'(),%-]+)(/.*)?(#.*)?$"); + if(gidPatternPool==null) + gidPatternPool=Pattern.compile("^https?://(www\\.)?e621\\.net/pool/show/([a-zA-Z0-9$_.+!*'(),%-]+)(\\?.*)?(/.*)?(#.*)?$"); + + Matcher m = gidPattern.matcher(url.toExternalForm()); + if(m.matches()) + return m.group(2); + + m = gidPatternPool.matcher(url.toExternalForm()); + if(m.matches()) + return m.group(2); + + throw new MalformedURLException("Expected e621.net URL format: e621.net/post/index/1/searchterm - got "+url+" instead"); + } + + @Override + public String getGID(URL url) throws MalformedURLException { + try { + String prefix=""; + if(url.getPath().startsWith("/pool/show/")) + prefix="pool_"; + + return Utils.filesystemSafe(prefix+new URI(getTerm(url)).getPath()); + } catch (URISyntaxException ex) { + Logger.getLogger(PahealRipper.class.getName()).log(Level.SEVERE, null, ex); + } + + throw new MalformedURLException("Expected e621.net URL format: e621.net/post/index/1/searchterm - got "+url+" instead"); + } + + @Override + public URL sanitizeURL(URL url) throws MalformedURLException { + if(gidPattern2==null) + gidPattern2=Pattern.compile("^https?://(www\\.)?e621\\.net/post/search\\?tags=([a-zA-Z0-9$_.+!*'(),%-]+)(/.*)?(#.*)?$"); + + Matcher m = gidPattern2.matcher(url.toExternalForm()); + if(m.matches()) + return new URL("https://e621.net/post/index/1/"+m.group(2).replace("+","%20")); + + return url; + } + +} \ No newline at end of file diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/PahealRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/PahealRipper.java new file mode 100644 index 00000000..4db47885 --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/PahealRipper.java @@ -0,0 +1,122 @@ +package com.rarchives.ripme.ripper.rippers; + +import com.rarchives.ripme.ripper.AbstractHTMLRipper; +import com.rarchives.ripme.utils.Http; +import com.rarchives.ripme.utils.Utils; +import java.io.File; +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URI; +import java.net.URISyntaxException; +import java.net.URL; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.logging.Level; +import java.util.logging.Logger; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +/** + * + * @author + */ +public class PahealRipper extends AbstractHTMLRipper{ + private static Map cookies=null; + private static Pattern gidPattern=null; + + private static Map getCookies() { + if(cookies==null){ + cookies=new HashMap(1); + cookies.put("ui-tnc-agreed","true"); + } + return cookies; + } + + public PahealRipper(URL url) throws IOException { + super(url); + } + + @Override + public String getDomain() { + return "rule34.paheal.net"; + } + + @Override + public String getHost() { + return "paheal"; + } + + @Override + public Document getFirstPage() throws IOException { + return Http.url("http://rule34.paheal.net/post/list/"+getTerm(url)+"/1").cookies(getCookies()).get(); + } + + @Override + public Document getNextPage(Document page) throws IOException { + for(Element e:page.select("#paginator a")){ + if(e.text().toLowerCase().equals("next")) + return Http.url(e.absUrl("href")).cookies(getCookies()).get(); + } + + return null; + } + + @Override + public List getURLsFromPage(Document page) { + Elements elements=page.select(".shm-thumb.thumb>a").not(".shm-thumb-link"); + List res=new ArrayList(elements.size()); + + for(Element e:elements) + res.add(e.absUrl("href")); + + return res; + } + + @Override + public void downloadURL(URL url, int index) { + try { + String name=url.getPath(); + String ext=".png"; + + name=name.substring(name.lastIndexOf('/')+1); + if(name.indexOf('.')>=0){ + ext=name.substring(name.lastIndexOf('.')); + name=name.substring(0,name.length()-ext.length()); + } + + addURLToDownload(url,new File(workingDir.getCanonicalPath()+File.separator+Utils.filesystemSafe(new URI(name).getPath())+ext)); + } catch (IOException ex) { + Logger.getLogger(PahealRipper.class.getName()).log(Level.SEVERE, null, ex); + } catch (URISyntaxException ex) { + Logger.getLogger(PahealRipper.class.getName()).log(Level.SEVERE, null, ex); + } + } + + private String getTerm(URL url) throws MalformedURLException{ + if(gidPattern==null) + gidPattern=Pattern.compile("^https?://(www\\.)?rule34\\.paheal\\.net/post/list/([a-zA-Z0-9$_.+!*'(),%-]+)(/.*)?(#.*)?$"); + + Matcher m = gidPattern.matcher(url.toExternalForm()); + if(m.matches()) + return m.group(2); + + throw new MalformedURLException("Expected paheal.net URL format: rule34.paheal.net/post/list/searchterm - got "+url+" instead"); + } + + @Override + public String getGID(URL url) throws MalformedURLException { + try { + return Utils.filesystemSafe(new URI(getTerm(url)).getPath()); + } catch (URISyntaxException ex) { + Logger.getLogger(PahealRipper.class.getName()).log(Level.SEVERE, null, ex); + } + + throw new MalformedURLException("Expected paheal.net URL format: rule34.paheal.net/post/list/searchterm - got "+url+" instead"); + } + +} diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/XbooruRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/XbooruRipper.java new file mode 100644 index 00000000..6d7d70fc --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/XbooruRipper.java @@ -0,0 +1,97 @@ + +package com.rarchives.ripme.ripper.rippers; + +import com.rarchives.ripme.ripper.AbstractHTMLRipper; +import com.rarchives.ripme.utils.Http; +import com.rarchives.ripme.utils.Utils; +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URI; +import java.net.URISyntaxException; +import java.net.URL; +import java.util.ArrayList; +import java.util.List; +import java.util.logging.Level; +import java.util.logging.Logger; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; + +/** + * + * @author + */ +public class XbooruRipper extends AbstractHTMLRipper{ + private static Pattern gidPattern=null; + + public XbooruRipper(URL url) throws IOException { + super(url); + } + + @Override + public String getDomain() { + return "xbooru.com"; + } + + @Override + public String getHost() { + return "xbooru"; + } + + private String getPage(int num) throws MalformedURLException{ + return "http://xbooru.com/index.php?page=dapi&s=post&q=index&pid="+num+"&tags="+getTerm(url); + } + + @Override + public Document getFirstPage() throws IOException { + return Http.url(getPage(0)).get(); + } + + @Override + public Document getNextPage(Document doc) throws IOException { + int offset=Integer.parseInt(doc.getElementsByTag("posts").first().attr("offset")); + int num=Integer.parseInt(doc.getElementsByTag("posts").first().attr("count")); + + if(offset+100>num) + return null; + + return Http.url(getPage(offset/100+1)).get(); + } + + @Override + public List getURLsFromPage(Document page) { + List res=new ArrayList(100); + for(Element e:page.getElementsByTag("post")) + res.add(e.absUrl("file_url")+"#"+e.attr("id")); + return res; + } + + @Override + public void downloadURL(URL url, int index) { + addURLToDownload(url,Utils.getConfigBoolean("download.save_order",true)?url.getRef()+"-":""); + } + + private String getTerm(URL url) throws MalformedURLException{ + if(gidPattern==null) + gidPattern=Pattern.compile("^https?://(www\\.)?xbooru\\.com/(index.php)?.*([?&]tags=([a-zA-Z0-9$_.+!*'(),%-]+))(\\&|(#.*)?$)"); + + Matcher m = gidPattern.matcher(url.toExternalForm()); + if(m.matches()) + return m.group(4); + + throw new MalformedURLException("Expected xbooru.com URL format: xbooru.com/index.php?tags=searchterm - got "+url+" instead"); + } + + @Override + public String getGID(URL url) throws MalformedURLException { + try { + return Utils.filesystemSafe(new URI(getTerm(url)).getPath()); + } catch (URISyntaxException ex) { + Logger.getLogger(PahealRipper.class.getName()).log(Level.SEVERE, null, ex); + } + + throw new MalformedURLException("Expected xbooru.com URL format: xbooru.com/index.php?tags=searchterm - got "+url+" instead"); + } + +}