From bbc78ef67f24a222dcdd6adef97b61e4feec1f00 Mon Sep 17 00:00:00 2001 From: kas-luthor Date: Thu, 26 Nov 2015 22:18:24 +0100 Subject: [PATCH] added ripper for xbooru.com removed license header for paheal ripper added percent sign to url matching for paheal ripper --- .../ripme/ripper/rippers/PahealRipper.java | 8 +- .../ripme/ripper/rippers/XbooruRipper.java | 83 +++++++++++++++++++ 2 files changed, 84 insertions(+), 7 deletions(-) create mode 100644 src/main/java/com/rarchives/ripme/ripper/rippers/XbooruRipper.java diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/PahealRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/PahealRipper.java index 16591eb1..520914a8 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/PahealRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/PahealRipper.java @@ -1,9 +1,3 @@ -/* - * To change this license header, choose License Headers in Project Properties. - * To change this template file, choose Tools | Templates - * and open the template in the editor. - */ - package com.rarchives.ripme.ripper.rippers; import com.rarchives.ripme.ripper.AbstractHTMLRipper; @@ -92,7 +86,7 @@ public class PahealRipper extends AbstractHTMLRipper{ @Override public String getGID(URL url) throws MalformedURLException { if(gidPattern==null) - gidPattern=Pattern.compile("^https?://(www\\.)?rule34\\.paheal\\.net/post/list/([a-zA-Z0-9$_.+!*'(),-]+)(/.*)?(#.*)?$"); + gidPattern=Pattern.compile("^https?://(www\\.)?rule34\\.paheal\\.net/post/list/([a-zA-Z0-9$_.+!*'(),%-]+)(/.*)?(#.*)?$"); Matcher m = gidPattern.matcher(url.toExternalForm()); if(m.matches()) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/XbooruRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/XbooruRipper.java new file mode 100644 index 00000000..dcdf54f5 --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/XbooruRipper.java @@ -0,0 +1,83 @@ + +package com.rarchives.ripme.ripper.rippers; + +import com.rarchives.ripme.ripper.AbstractHTMLRipper; +import com.rarchives.ripme.utils.Http; +import com.rarchives.ripme.utils.Utils; +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; + +/** + * + * @author + */ +public class XbooruRipper extends AbstractHTMLRipper{ + private Pattern gidPattern=null; + + public XbooruRipper(URL url) throws IOException { + super(url); + } + + @Override + public String getDomain() { + return "xbooru.com"; + } + + @Override + public String getHost() { + return "xbooru"; + } + + private String getPage(int num) throws MalformedURLException{ + return "http://xbooru.com/index.php?page=dapi&s=post&q=index&pid="+num+"&tags="+getGID(url); + } + + @Override + public Document getFirstPage() throws IOException { + return Http.url(getPage(0)).get(); + } + + @Override + public Document getNextPage(Document doc) throws IOException { + int offset=Integer.parseInt(doc.getElementsByTag("posts").first().attr("offset")); + int num=Integer.parseInt(doc.getElementsByTag("posts").first().attr("count")); + + if(offset+100>num) + return null; + + return Http.url(getPage(offset/100+1)).get(); + } + + @Override + public List getURLsFromPage(Document page) { + List res=new ArrayList(100); + for(Element e:page.getElementsByTag("post")) + res.add(e.attr("file_url")+"#"+e.attr("id")); + return res; + } + + @Override + public void downloadURL(URL url, int index) { + addURLToDownload(url,Utils.getConfigBoolean("download.save_order",true)?url.getRef()+"-":""); + } + + @Override + public String getGID(URL url) throws MalformedURLException { + if(gidPattern==null) + gidPattern=Pattern.compile("^https?://(www\\.)?xbooru\\.com/(index.php)?.*([?&]tags=([a-zA-Z0-9$_.+!*'(),%-]+))(\\&|(#.*)?$)"); + + Matcher m = gidPattern.matcher(url.toExternalForm()); + if(m.matches()) + return m.group(4); + + throw new MalformedURLException("Expected xbooru.com URL format: xbooru.com - got "+url+" instead"); + } + +}