From 2f4793e9e39145f070608328016bf8c10b7a8b35 Mon Sep 17 00:00:00 2001 From: kas-luthor Date: Sat, 14 Jan 2017 22:45:23 +0100 Subject: [PATCH 01/26] Added utility functions for parsing URL queries Rewrote E621Ripper to not use regexes anymore (therefore interacting better with special chars in URLs) --- .../ripme/ripper/rippers/E621Ripper.java | 139 +++++++++++------- .../java/com/rarchives/ripme/utils/Utils.java | 69 +++++++++ 2 files changed, 151 insertions(+), 57 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/E621Ripper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/E621Ripper.java index 190320f9..6f6731d9 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/E621Ripper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/E621Ripper.java @@ -6,10 +6,12 @@ import com.rarchives.ripme.ripper.DownloadThreadPool; import com.rarchives.ripme.utils.Http; import com.rarchives.ripme.utils.Utils; import java.io.IOException; +import java.io.UnsupportedEncodingException; import java.net.MalformedURLException; import java.net.URI; import java.net.URISyntaxException; import java.net.URL; +import java.net.URLDecoder; import java.util.ArrayList; import java.util.List; import java.util.logging.Level; @@ -25,69 +27,85 @@ import org.jsoup.select.Elements; * @author */ public class E621Ripper extends AbstractHTMLRipper{ - private static Pattern gidPattern=null; - private static Pattern gidPattern2=null; - private static Pattern gidPatternPool=null; +public static final int POOL_IMAGES_PER_PAGE = 24; - private DownloadThreadPool e621ThreadPool=new DownloadThreadPool("e621"); + private DownloadThreadPool e621ThreadPool = new DownloadThreadPool("e621"); public E621Ripper(URL url) throws IOException { super(url); } - + @Override public DownloadThreadPool getThreadPool() { return e621ThreadPool; } - + @Override public String getDomain() { return "e621.net"; } - + @Override public String getHost() { return "e621"; } - + @Override public Document getFirstPage() throws IOException { - if(url.getPath().startsWith("/pool/show/")) - return Http.url("https://e621.net/pool/show/"+getTerm(url)).get(); + if (url.getPath().startsWith("/pool/show/")) + return Http.url("https://e621.net/pool/show/" + getTerm(url)).get(); else - return Http.url("https://e621.net/post/index/1/"+getTerm(url)).get(); + return Http.url("https://e621.net/post/index/1/" + getTerm(url)).get(); } - + @Override public List getURLsFromPage(Document page) { - Elements elements=page.select("#post-list .thumb a,#pool-show .thumb a"); - List res=new ArrayList(elements.size()); + Elements elements = page.select("#post-list .thumb a,#pool-show .thumb a"); + List res = new ArrayList(elements.size()); - for(Element e:elements){ - res.add(e.absUrl("href")+"#"+e.child(0).attr("id").substring(1)); + if (page.getElementById("pool-show") != null) { + int index = 0; + + Element e = page.getElementById("paginator"); + if (e != null && (e = e.getElementsByClass("current").first()) != null) + index = (Integer.parseInt(e.text()) - 1) * POOL_IMAGES_PER_PAGE; + + for (Element e_ : elements) + res.add(e_.absUrl("href") + "#" + ++index); + + } else { + for (Element e : elements) + res.add(e.absUrl("href") + "#" + e.child(0).attr("id").substring(1)); } return res; } - + @Override public Document getNextPage(Document page) throws IOException { - for(Element e:page.select("#paginator a")){ - if(e.attr("rel").equals("next")) + for (Element e : page.select("#paginator a")) { + if (e.attr("rel").equals("next")) return Http.url(e.absUrl("href")).get(); } return null; } - + @Override public void downloadURL(final URL url, int index) { e621ThreadPool.addThread(new Thread(new Runnable() { public void run() { try { - Document page=Http.url(url).get(); + Document page = Http.url(url).get(); + Element e = page.getElementById("image"); + + if (e != null) + addURLToDownload(new URL(e.absUrl("src")), Utils.getConfigBoolean("download.save_order", true) ? url.getRef() + "-" : ""); + else if ((e = page.select(".content object>param[name=\"movie\"]").first()) != null) + addURLToDownload(new URL(e.absUrl("value")), Utils.getConfigBoolean("download.save_order", true) ? url.getRef() + "-" : ""); + else + Logger.getLogger(E621Ripper.class.getName()).log(Level.WARNING, "Unsupported media type - please report to program author: " + url.toString()); - addURLToDownload(new URL(page.getElementById("image").absUrl("src")),Utils.getConfigBoolean("download.save_order",true)?url.getRef()+"-":""); } catch (IOException ex) { Logger.getLogger(E621Ripper.class.getName()).log(Level.SEVERE, null, ex); } @@ -95,48 +113,55 @@ public class E621Ripper extends AbstractHTMLRipper{ })); } - private String getTerm(URL url) throws MalformedURLException{ - if(gidPattern==null) - gidPattern=Pattern.compile("^https?://(www\\.)?e621\\.net/post/index/[^/]+/([a-zA-Z0-9$_.+!*'(),%-]+)(/.*)?(#.*)?$"); - if(gidPatternPool==null) - gidPatternPool=Pattern.compile("^https?://(www\\.)?e621\\.net/pool/show/([a-zA-Z0-9$_.+!*'(),%-]+)(\\?.*)?(/.*)?(#.*)?$"); - - Matcher m = gidPattern.matcher(url.toExternalForm()); - if(m.matches()) - return m.group(2); + private String getTerm(URL url) throws MalformedURLException { + String query = url.getQuery(); - m = gidPatternPool.matcher(url.toExternalForm()); - if(m.matches()) - return m.group(2); + if (query != null) + return Utils.parseUrlQuery(query, "tags"); - throw new MalformedURLException("Expected e621.net URL format: e621.net/post/index/1/searchterm - got "+url+" instead"); - } - - @Override - public String getGID(URL url) throws MalformedURLException { - try { - String prefix=""; - if(url.getPath().startsWith("/pool/show/")) - prefix="pool_"; - - return Utils.filesystemSafe(prefix+new URI(getTerm(url)).getPath()); - } catch (URISyntaxException ex) { - Logger.getLogger(PahealRipper.class.getName()).log(Level.SEVERE, null, ex); + if (query == null) { + if ((query = url.getPath()).startsWith("/post/index/")) { + query = query.substring(12); + + int pos = query.indexOf('/'); + if (pos == -1) + return null; + + // skip page number + query = query.substring(pos + 1); + + if (query.endsWith("/")) + query = query.substring(0, query.length() - 1); + + try { + return URLDecoder.decode(query, "UTF-8"); + } catch (UnsupportedEncodingException e) { + // Shouldn't happen since UTF-8 is required to be supported + throw new RuntimeException(e); + } + + } else if (query.startsWith("/pool/show/")) { + query = query.substring(11); + + if (query.endsWith("/")) + query = query.substring(0, query.length() - 1); + + return query; + } } - throw new MalformedURLException("Expected e621.net URL format: e621.net/post/index/1/searchterm - got "+url+" instead"); + return null; } - + @Override - public URL sanitizeURL(URL url) throws MalformedURLException { - if(gidPattern2==null) - gidPattern2=Pattern.compile("^https?://(www\\.)?e621\\.net/post/search\\?tags=([a-zA-Z0-9$_.+!*'(),%-]+)(/.*)?(#.*)?$"); + public String getGID(URL url) throws MalformedURLException { + String prefix = ""; + if (url.getPath().startsWith("/pool/show/")) + prefix = "pool_"; + else + prefix = "term_"; - Matcher m = gidPattern2.matcher(url.toExternalForm()); - if(m.matches()) - return new URL("https://e621.net/post/index/1/"+m.group(2).replace("+","%20")); - - return url; + return Utils.filesystemSafe(prefix + getTerm(url)); } } \ No newline at end of file diff --git a/src/main/java/com/rarchives/ripme/utils/Utils.java b/src/main/java/com/rarchives/ripme/utils/Utils.java index 946fce54..b4e9a311 100644 --- a/src/main/java/com/rarchives/ripme/utils/Utils.java +++ b/src/main/java/com/rarchives/ripme/utils/Utils.java @@ -3,13 +3,16 @@ package com.rarchives.ripme.utils; import java.io.File; import java.io.IOException; import java.io.InputStream; +import java.io.UnsupportedEncodingException; import java.lang.reflect.Constructor; import java.net.URISyntaxException; import java.net.URL; import java.net.URLDecoder; import java.util.ArrayList; import java.util.Enumeration; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.jar.JarEntry; import java.util.jar.JarFile; @@ -387,4 +390,70 @@ public class Utils { } return result; } + + /** + * Parses an URL query + * + * @param query + * The query part of an URL + * @return The map of all query parameters + */ + public static Map parseUrlQuery(String query) { + Map res = new HashMap(); + + if (query.equals("")) + return res; + + String[] parts = query.split("&"); + int pos; + + try { + for (String part : parts) { + if ((pos = part.indexOf('=')) >= 0) + res.put(URLDecoder.decode(part.substring(0, pos), "UTF-8"), + URLDecoder.decode(part.substring(pos + 1), "UTF-8")); + else + res.put(URLDecoder.decode(part, "UTF-8"), ""); + } + } catch (UnsupportedEncodingException e) { + // Shouldn't happen since UTF-8 is required to be supported + throw new RuntimeException(e); + } + + return res; + } + + /** + * Parses an URL query and returns the requested parameter's value + * + * @param query + * The query part of an URL + * @param key + * The key whose value is requested + * @return The associated value or null if key wasn't found + */ + public static String parseUrlQuery(String query, String key) { + if (query.equals("")) + return null; + + String[] parts = query.split("&"); + int pos; + + try { + for (String part : parts) { + if ((pos = part.indexOf('=')) >= 0) { + if (URLDecoder.decode(part.substring(0, pos), "UTF-8").equals(key)) + return URLDecoder.decode(part.substring(pos + 1), "UTF-8"); + + } else if (URLDecoder.decode(part, "UTF-8").equals(key)) { + return ""; + } + } + } catch (UnsupportedEncodingException e) { + // Shouldn't happen since UTF-8 is required to be supported + throw new RuntimeException(e); + } + + return null; + } } From 21476ec2eb01ac36552b2c2917566118d21e47df Mon Sep 17 00:00:00 2001 From: Sodazona Date: Sun, 15 Jan 2017 17:59:58 +0000 Subject: [PATCH 02/26] Added ripper for zizki.com --- .../ripme/ripper/rippers/ZizkiRipper.java | 125 ++++++++++++++++++ 1 file changed, 125 insertions(+) create mode 100644 src/main/java/com/rarchives/ripme/ripper/rippers/ZizkiRipper.java diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ZizkiRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ZizkiRipper.java new file mode 100644 index 00000000..b200c413 --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ZizkiRipper.java @@ -0,0 +1,125 @@ +package com.rarchives.ripme.ripper.rippers; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.jsoup.Connection.Response; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +import com.rarchives.ripme.ripper.AbstractHTMLRipper; +import com.rarchives.ripme.ui.RipStatusMessage.STATUS; +import com.rarchives.ripme.utils.Http; + +public class ZizkiRipper extends AbstractHTMLRipper { + + private Document albumDoc = null; + private Map cookies = new HashMap(); + + public ZizkiRipper(URL url) throws IOException { + super(url); + } + + @Override + public String getHost() { + return "zizki"; + } + @Override + public String getDomain() { + return "zizki.com"; + } + + @Override + public String getGID(URL url) throws MalformedURLException { + Pattern p = Pattern.compile("^https?://(www\\.)?zizki\\.com/([a-zA-Z0-9\\-_]+).*$"); + Matcher m = p.matcher(url.toExternalForm()); + if (!m.matches()) { + throw new MalformedURLException("Expected URL format: http://www.zizki.com/author/albumname, got: " + url); + } + return m.group(m.groupCount()); + } + + @Override + public String getAlbumTitle(URL url) throws MalformedURLException { + try { + // Attempt to use album title as GID + Element titleElement = getFirstPage().select("meta[name=description]").first(); + String title = titleElement.attr("content"); + title = title.substring(title.lastIndexOf('/') + 1); + + Element authorSpan = getFirstPage().select("span[class=creator]").first(); + String author = authorSpan.select("a").first().text(); + logger.debug("Author: " + author); + return getHost() + "_" + author + "_" + title.trim(); + } catch (IOException e) { + // Fall back to default album naming convention + logger.info("Unable to find title at " + url); + } + return super.getAlbumTitle(url); + } + + @Override + public Document getFirstPage() throws IOException { + if (albumDoc == null) { + Response resp = Http.url(url).response(); + cookies.putAll(resp.cookies()); + albumDoc = resp.parse(); + } + return albumDoc; + } + + @Override + public List getURLsFromPage(Document page) { + List imageURLs = new ArrayList(); + // Page contains images + logger.info("Look for images."); + for (Element thumb : page.select("img")) { + logger.info("Img"); + if (super.isStopped()) break; + // Find thumbnail image source + String image = null; + String img_type = null; + String src = null; + if (thumb.hasAttr("typeof")) { + img_type = thumb.attr("typeof"); + if (img_type.equals("foaf:Image")) { + logger.debug("Found image with " + img_type); + if (thumb.parent() != null && + thumb.parent().parent() != null && + thumb.parent().parent().attr("class") != null && + thumb.parent().parent().attr("class").equals("aimage-center") + ) + { + src = thumb.attr("src"); + logger.debug("Found url with " + src); + if (!src.contains("zizki.com")) { + continue; + } else { + imageURLs.add(src.replace("/styles/medium/public/","/styles/large/public/")); + } + } + } + } + } + return imageURLs; + } + + @Override + public void downloadURL(URL url, int index) { + addURLToDownload(url, getPrefix(index), "", this.url.toExternalForm(), cookies); + } + + @Override + public String getPrefix(int index) { + return String.format("%03d_", index); + } +} From 44d71e634008b462b70a6bcaa965740955f6aab5 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Tue, 21 Feb 2017 15:39:52 -0500 Subject: [PATCH 03/26] Fixed 8muses naming issue and removed catch for unthrown error --- .../rarchives/ripme/ripper/rippers/EightmusesRipper.java | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java index b85d948d..3c0f1d90 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java @@ -54,7 +54,8 @@ public class EightmusesRipper extends AbstractHTMLRipper { // Attempt to use album title as GID Element titleElement = getFirstPage().select("meta[name=description]").first(); String title = titleElement.attr("content"); - title = title.substring(title.lastIndexOf('/') + 1); + title = title.replace("A huge collection of free porn comics for adults. Read", ""); + title = title.replace("online for free at 8muses.com", ""); return getHost() + "_" + title.trim(); } catch (IOException e) { // Fall back to default album naming convention @@ -122,14 +123,10 @@ public class EightmusesRipper extends AbstractHTMLRipper { } try { logger.info("Retrieving full-size image location from " + parentHref); - Thread.sleep(1000); image = getFullSizeImage(parentHref); } catch (IOException e) { logger.error("Failed to get full-size image from " + parentHref); continue; - } catch (InterruptedException e) { - logger.error("Interrupted while getting full-size image from " + parentHref); - continue; } } if (!image.contains("8muses.com")) { From 5eaa8a7d8c2b2e00b5ed429f444ebc0668fce7d6 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Tue, 21 Feb 2017 17:14:59 -0500 Subject: [PATCH 04/26] Added Myhentaicomics Ripper --- .../ripper/rippers/MyhentaicomicsRipper.java | 200 ++++++++++++++++++ 1 file changed, 200 insertions(+) create mode 100644 src/main/java/com/rarchives/ripme/ripper/rippers/MyhentaicomicsRipper.java diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/MyhentaicomicsRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/MyhentaicomicsRipper.java new file mode 100644 index 00000000..ad7f7cf5 --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/MyhentaicomicsRipper.java @@ -0,0 +1,200 @@ +package com.rarchives.ripme.ripper.rippers; + +import com.rarchives.ripme.ripper.AbstractHTMLRipper; +import com.rarchives.ripme.utils.Http; +import com.rarchives.ripme.utils.Utils; +import java.io.File; +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URI; +import java.net.URISyntaxException; +import java.net.URL; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.logging.Level; +import java.util.logging.Logger; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +public class MyhentaicomicsRipper extends AbstractHTMLRipper { + public static boolean isTag; + + public MyhentaicomicsRipper(URL url) throws IOException { + super(url); + } + + @Override + public String getHost() { + return "myhentaicomics"; + } + + @Override + public String getDomain() { + return "myhentaicomics.com"; + } + + @Override + public String getGID(URL url) throws MalformedURLException { + Pattern p = Pattern.compile("^https?://myhentaicomics.com/index.php/([a-zA-Z0-9-]*)/?$"); + Matcher m = p.matcher(url.toExternalForm()); + if (m.matches()) { + isTag = false; + return m.group(1); + } + + Pattern pa = Pattern.compile("^https?://myhentaicomics.com/index.php/search\\?q=([a-zA-Z0-9-]*)([a-zA-Z0-9=&]*)?$"); + Matcher ma = pa.matcher(url.toExternalForm()); + if (ma.matches()) { + isTag = true; + return ma.group(1); + } + + Pattern pat = Pattern.compile("^http://myhentaicomics.com/index.php/tag/([0-9]*)/?([a-zA-Z%0-9+\\?=:]*)?$"); + Matcher mat = pat.matcher(url.toExternalForm()); + if (mat.matches()) { + isTag = true; + return mat.group(1); + } + + throw new MalformedURLException("Expected myhentaicomics.com URL format: " + + "myhentaicomics.com/index.php/albumName - got " + url + " instead"); + } + + @Override + public Document getFirstPage() throws IOException { + // "url" is an instance field of the superclass + return Http.url(url).get(); + } + + @Override + public Document getNextPage(Document doc) throws IOException { + // Find next page + String nextUrl = ""; + Element elem = doc.select("a.ui-icon-right").first(); + String nextPage = elem.attr("href"); + Pattern p = Pattern.compile("/index.php/[a-zA-Z0-9_-]*\\?page=\\d"); + Matcher m = p.matcher(nextPage); + if (m.matches()) { + nextUrl = "http://myhentaicomics.com" + m.group(0); + } + if (nextUrl == "") { + throw new IOException("No more pages"); + } + // Sleep for half a sec to avoid getting IP banned + sleep(500); + return Http.url(nextUrl).get(); + } + + // This replaces getNextPage when downloading from searchs and tags + public List getNextAlbumPage(String pageUrl) { + List albumPagesList = new ArrayList(); + int pageNumber = 1; + albumPagesList.add("http://myhentaicomics.com/index.php/" + pageUrl.split("\\?")[0] + "?page=" + Integer.toString(pageNumber)); + while(true) { + String urlToGet = "http://myhentaicomics.com/index.php/" + pageUrl.split("\\?")[0] + "?page=" + Integer.toString(pageNumber); + Document nextAlbumPage; + try { + logger.info("Grabbing " + urlToGet); + nextAlbumPage = Http.url(urlToGet).get(); + } catch(IOException e){ + logger.warn("Failed to log link in Jsoup"); + nextAlbumPage = null; + e.printStackTrace(); + } + Element elem = nextAlbumPage.select("a.ui-icon-right").first(); + String nextPage = elem.attr("href"); + pageNumber = pageNumber + 1; + if(nextPage == ""){ + logger.info("Got " + pageNumber + " pages"); + break; + } + else { + logger.info(nextPage); + albumPagesList.add(nextPage); + logger.info("Adding " + nextPage); + } + } + return albumPagesList; + } + + + @Override + public List getURLsFromPage(Document doc) { + List result = new ArrayList(); + List pagesToRip; + // Checks if this is a comic page or a page of albums + if (doc.toString().contains("class=\"g-item g-album\"")) { + for (Element elem : doc.select("li.g-album > a")) { + String link = elem.attr("href"); + logger.info("Grabbing album " + link); + pagesToRip = getNextAlbumPage(link); + logger.info(pagesToRip); + for (String element : pagesToRip) { + Document album_doc; + try { + logger.info("grabbing " + element + " with jsoup"); + boolean startsWithhttp = element.startsWith("http"); + if (startsWithhttp == false) { + album_doc = Http.url("http://myhentaicomics.com/" + element).get(); + } + else { + album_doc = Http.url(element).get(); + } + } catch(IOException e){ + logger.warn("Failed to log link in Jsoup"); + album_doc = null; + e.printStackTrace(); + } + for (Element el :album_doc.select("img")) { + String imageSource = el.attr("src"); + // This bool is here so we don't try and download the site logo + boolean b = imageSource.startsWith("http"); + if (b == false) { + // We replace thumbs with resizes so we can the full sized images + imageSource = imageSource.replace("thumbs", "resizes"); + result.add("http://myhentaicomics.com/" + imageSource); + } + } + } + + } + } + else { + for (Element el : doc.select("img")) { + String imageSource = el.attr("src"); + // This bool is here so we don't try and download the site logo + boolean b = imageSource.startsWith("http"); + if (b == false) { + // We replace thumbs with resizes so we can the full sized images + imageSource = imageSource.replace("thumbs", "resizes"); + result.add("http://myhentaicomics.com/" + imageSource); + } + } + } + return result; + } + + @Override + public void downloadURL(URL url, int index) { + String url_string = url.toExternalForm(); + url_string = url_string.replace("%20", "_"); + url_string = url_string.replace("%27", ""); + url_string = url_string.replace("%28", "_"); + url_string = url_string.replace("%29", "_") + url_string = url_string.replace("%2C", "_"); + if (isTag == true) { + logger.info("Downloading from a tag or search"); + addURLToDownload(url, getPrefix(index), url_string.split("/")[6]); + } + else { + addURLToDownload(url, getPrefix(index)); + } + } + + +} From fd15ab8673e3d4f37bda7fe1697ed56da605c152 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Tue, 21 Feb 2017 17:29:34 -0500 Subject: [PATCH 05/26] added missing ; --- .../rarchives/ripme/ripper/rippers/MyhentaicomicsRipper.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/MyhentaicomicsRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/MyhentaicomicsRipper.java index ad7f7cf5..950d8da4 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/MyhentaicomicsRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/MyhentaicomicsRipper.java @@ -185,7 +185,7 @@ public class MyhentaicomicsRipper extends AbstractHTMLRipper { url_string = url_string.replace("%20", "_"); url_string = url_string.replace("%27", ""); url_string = url_string.replace("%28", "_"); - url_string = url_string.replace("%29", "_") + url_string = url_string.replace("%29", "_"); url_string = url_string.replace("%2C", "_"); if (isTag == true) { logger.info("Downloading from a tag or search"); From 334a124b9f840759e4c74c684f9cd594b383ee94 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Wed, 22 Feb 2017 01:23:53 -0500 Subject: [PATCH 06/26] Added missing subdomain for 4chan (#452) --- .../java/com/rarchives/ripme/ripper/rippers/ChanRipper.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ChanRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ChanRipper.java index 1c00583c..b9289f28 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/ChanRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ChanRipper.java @@ -19,7 +19,7 @@ import com.rarchives.ripme.utils.Http; public class ChanRipper extends AbstractHTMLRipper { public static List explicit_domains = Arrays.asList( - new ChanSite(Arrays.asList("boards.4chan.org"), Arrays.asList("4cdn.org", "is.4chan.org")), + new ChanSite(Arrays.asList("boards.4chan.org"), Arrays.asList("4cdn.org", "is.4chan.org", "is2.4chan.org")), new ChanSite(Arrays.asList("archive.moe"), Arrays.asList("data.archive.moe")), new ChanSite(Arrays.asList("4archive.org"), Arrays.asList("imgur.com")), new ChanSite(Arrays.asList("archive.4plebs.org"), Arrays.asList("img.4plebs.org")), From 06e88efedf060354671e3801686d1f2c2fa0860a Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Wed, 22 Feb 2017 01:24:52 -0500 Subject: [PATCH 07/26] removed the unused g subdomain from the ehentai ripper (#453) --- .../ripme/ripper/rippers/EHentaiRipper.java | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/EHentaiRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/EHentaiRipper.java index 44790487..a622d832 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/EHentaiRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/EHentaiRipper.java @@ -38,7 +38,7 @@ public class EHentaiRipper extends AbstractHTMLRipper { // Current HTML document private Document albumDoc = null; - + private static final Map cookies = new HashMap(); static { cookies.put("nw", "1"); @@ -53,10 +53,10 @@ public class EHentaiRipper extends AbstractHTMLRipper { public String getHost() { return "e-hentai"; } - + @Override public String getDomain() { - return "g.e-hentai.org"; + return "e-hentai.org"; } public String getAlbumTitle(URL url) throws MalformedURLException { @@ -79,18 +79,18 @@ public class EHentaiRipper extends AbstractHTMLRipper { Pattern p; Matcher m; - p = Pattern.compile("^.*g\\.e-hentai\\.org/g/([0-9]+)/([a-fA-F0-9]+)/$"); + p = Pattern.compile("^https?://e-hentai\\.org/g/([0-9]+)/([a-fA-F0-9]+)/$"); m = p.matcher(url.toExternalForm()); if (m.matches()) { return m.group(1) + "-" + m.group(2); } throw new MalformedURLException( - "Expected g.e-hentai.org gallery format: " - + "http://g.e-hentai.org/g/####/####/" + "Expected e-hentai.org gallery format: " + + "http://e-hentai.org/g/####/####/" + " Got: " + url); } - + /** * Attempts to get page, checks for IP ban, waits. * @param url @@ -185,7 +185,7 @@ public class EHentaiRipper extends AbstractHTMLRipper { /** * Helper class to find and download images found on "image" pages - * + * * Handles case when site has IP-banned the user. */ private class EHentaiImageThread extends Thread { @@ -204,7 +204,7 @@ public class EHentaiRipper extends AbstractHTMLRipper { public void run() { fetchImage(); } - + private void fetchImage() { try { Document doc = getPageWithRetries(this.url); @@ -246,4 +246,4 @@ public class EHentaiRipper extends AbstractHTMLRipper { } } } -} \ No newline at end of file +} From ab912542268b56ff5bc70e7ab02b2ba152b64a99 Mon Sep 17 00:00:00 2001 From: kas-luthor Date: Wed, 22 Feb 2017 10:31:55 +0100 Subject: [PATCH 08/26] Code style changes --- .../ripme/ripper/rippers/E621Ripper.java | 112 ++++++++------- .../java/com/rarchives/ripme/utils/Utils.java | 133 +++++++++--------- 2 files changed, 131 insertions(+), 114 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/E621Ripper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/E621Ripper.java index 6f6731d9..e45d3980 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/E621Ripper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/E621Ripper.java @@ -26,71 +26,79 @@ import org.jsoup.select.Elements; * * @author */ -public class E621Ripper extends AbstractHTMLRipper{ -public static final int POOL_IMAGES_PER_PAGE = 24; - +public class E621Ripper extends AbstractHTMLRipper { + public static final int POOL_IMAGES_PER_PAGE = 24; + private DownloadThreadPool e621ThreadPool = new DownloadThreadPool("e621"); - + public E621Ripper(URL url) throws IOException { super(url); } - + @Override public DownloadThreadPool getThreadPool() { return e621ThreadPool; } - + @Override public String getDomain() { return "e621.net"; } - + @Override public String getHost() { return "e621"; } - + @Override public Document getFirstPage() throws IOException { - if (url.getPath().startsWith("/pool/show/")) + if (url.getPath().startsWith("/pool/show/")) { return Http.url("https://e621.net/pool/show/" + getTerm(url)).get(); - else + } else { return Http.url("https://e621.net/post/index/1/" + getTerm(url)).get(); + } } - + @Override public List getURLsFromPage(Document page) { Elements elements = page.select("#post-list .thumb a,#pool-show .thumb a"); List res = new ArrayList(elements.size()); - + if (page.getElementById("pool-show") != null) { int index = 0; - + Element e = page.getElementById("paginator"); - if (e != null && (e = e.getElementsByClass("current").first()) != null) - index = (Integer.parseInt(e.text()) - 1) * POOL_IMAGES_PER_PAGE; - - for (Element e_ : elements) + if (e != null) { + e = e.getElementsByClass("current").first(); + if (e != null) { + index = (Integer.parseInt(e.text()) - 1) * POOL_IMAGES_PER_PAGE; + } + } + + for (Element e_ : elements) { res.add(e_.absUrl("href") + "#" + ++index); - + } + } else { - for (Element e : elements) + for (Element e : elements) { res.add(e.absUrl("href") + "#" + e.child(0).attr("id").substring(1)); + } } - + return res; } - + @Override public Document getNextPage(Document page) throws IOException { for (Element e : page.select("#paginator a")) { - if (e.attr("rel").equals("next")) + if (e.attr("rel").equals("next")) { return Http.url(e.absUrl("href")).get(); + } } - + return null; } - + @Override public void downloadURL(final URL url, int index) { e621ThreadPool.addThread(new Thread(new Runnable() { @@ -98,70 +106,76 @@ public static final int POOL_IMAGES_PER_PAGE = 24; try { Document page = Http.url(url).get(); Element e = page.getElementById("image"); - - if (e != null) + + if (e != null) { addURLToDownload(new URL(e.absUrl("src")), Utils.getConfigBoolean("download.save_order", true) ? url.getRef() + "-" : ""); - else if ((e = page.select(".content object>param[name=\"movie\"]").first()) != null) + } else if ((e = page.select(".content object>param[name=\"movie\"]").first()) != null) { addURLToDownload(new URL(e.absUrl("value")), Utils.getConfigBoolean("download.save_order", true) ? url.getRef() + "-" : ""); - else + } else { Logger.getLogger(E621Ripper.class.getName()).log(Level.WARNING, "Unsupported media type - please report to program author: " + url.toString()); - + } + } catch (IOException ex) { Logger.getLogger(E621Ripper.class.getName()).log(Level.SEVERE, null, ex); } } })); } - + private String getTerm(URL url) throws MalformedURLException { String query = url.getQuery(); - - if (query != null) + + if (query != null) { return Utils.parseUrlQuery(query, "tags"); - + } + if (query == null) { if ((query = url.getPath()).startsWith("/post/index/")) { query = query.substring(12); - + int pos = query.indexOf('/'); - if (pos == -1) + if (pos == -1) { return null; - + } + // skip page number query = query.substring(pos + 1); - - if (query.endsWith("/")) + + if (query.endsWith("/")) { query = query.substring(0, query.length() - 1); - + } + try { return URLDecoder.decode(query, "UTF-8"); } catch (UnsupportedEncodingException e) { // Shouldn't happen since UTF-8 is required to be supported throw new RuntimeException(e); } - + } else if (query.startsWith("/pool/show/")) { query = query.substring(11); - - if (query.endsWith("/")) + + if (query.endsWith("/")) { query = query.substring(0, query.length() - 1); - + } + return query; } } - + return null; } - + @Override public String getGID(URL url) throws MalformedURLException { String prefix = ""; - if (url.getPath().startsWith("/pool/show/")) + if (url.getPath().startsWith("/pool/show/")) { prefix = "pool_"; - else + } else { prefix = "term_"; - + } + return Utils.filesystemSafe(prefix + getTerm(url)); } - + } \ No newline at end of file diff --git a/src/main/java/com/rarchives/ripme/utils/Utils.java b/src/main/java/com/rarchives/ripme/utils/Utils.java index b4e9a311..f3957360 100644 --- a/src/main/java/com/rarchives/ripme/utils/Utils.java +++ b/src/main/java/com/rarchives/ripme/utils/Utils.java @@ -390,70 +390,73 @@ public class Utils { } return result; } - + /** - * Parses an URL query - * - * @param query - * The query part of an URL - * @return The map of all query parameters - */ - public static Map parseUrlQuery(String query) { - Map res = new HashMap(); - - if (query.equals("")) - return res; - - String[] parts = query.split("&"); - int pos; - - try { - for (String part : parts) { - if ((pos = part.indexOf('=')) >= 0) - res.put(URLDecoder.decode(part.substring(0, pos), "UTF-8"), - URLDecoder.decode(part.substring(pos + 1), "UTF-8")); - else - res.put(URLDecoder.decode(part, "UTF-8"), ""); - } - } catch (UnsupportedEncodingException e) { - // Shouldn't happen since UTF-8 is required to be supported - throw new RuntimeException(e); - } - - return res; - } - - /** - * Parses an URL query and returns the requested parameter's value - * - * @param query - * The query part of an URL - * @param key - * The key whose value is requested - * @return The associated value or null if key wasn't found - */ - public static String parseUrlQuery(String query, String key) { - if (query.equals("")) - return null; - - String[] parts = query.split("&"); - int pos; - - try { - for (String part : parts) { - if ((pos = part.indexOf('=')) >= 0) { - if (URLDecoder.decode(part.substring(0, pos), "UTF-8").equals(key)) - return URLDecoder.decode(part.substring(pos + 1), "UTF-8"); - - } else if (URLDecoder.decode(part, "UTF-8").equals(key)) { - return ""; - } - } - } catch (UnsupportedEncodingException e) { - // Shouldn't happen since UTF-8 is required to be supported - throw new RuntimeException(e); - } - - return null; - } + * Parses an URL query + * + * @param query + * The query part of an URL + * @return The map of all query parameters + */ + public static Map parseUrlQuery(String query) { + Map res = new HashMap(); + + if (query.equals("")){ + return res; + } + + String[] parts = query.split("&"); + int pos; + + try { + for (String part : parts) { + if ((pos = part.indexOf('=')) >= 0){ + res.put(URLDecoder.decode(part.substring(0, pos), "UTF-8"), URLDecoder.decode(part.substring(pos + 1), "UTF-8")); + }else{ + res.put(URLDecoder.decode(part, "UTF-8"), ""); + } + } + } catch (UnsupportedEncodingException e) { + // Shouldn't happen since UTF-8 is required to be supported + throw new RuntimeException(e); + } + + return res; + } + + /** + * Parses an URL query and returns the requested parameter's value + * + * @param query + * The query part of an URL + * @param key + * The key whose value is requested + * @return The associated value or null if key wasn't found + */ + public static String parseUrlQuery(String query, String key) { + if (query.equals("")){ + return null; + } + + String[] parts = query.split("&"); + int pos; + + try { + for (String part : parts) { + if ((pos = part.indexOf('=')) >= 0) { + if (URLDecoder.decode(part.substring(0, pos), "UTF-8").equals(key)){ + return URLDecoder.decode(part.substring(pos + 1), "UTF-8"); + } + + } else if (URLDecoder.decode(part, "UTF-8").equals(key)) { + return ""; + } + } + } catch (UnsupportedEncodingException e) { + // Shouldn't happen since UTF-8 is required to be supported + throw new RuntimeException(e); + } + + return null; + } } From 94738b16474bb6601018b292b65749038f9560ac Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Wed, 22 Feb 2017 09:58:52 -0500 Subject: [PATCH 09/26] Added ripper for sinner comics --- .../ripper/rippers/SinnercomicsRipper.java | 88 +++++++++++++++++++ 1 file changed, 88 insertions(+) create mode 100644 src/main/java/com/rarchives/ripme/ripper/rippers/SinnercomicsRipper.java diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/SinnercomicsRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/SinnercomicsRipper.java new file mode 100644 index 00000000..645631f5 --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/SinnercomicsRipper.java @@ -0,0 +1,88 @@ +package com.rarchives.ripme.ripper.rippers; + +import com.rarchives.ripme.ripper.AbstractHTMLRipper; +import com.rarchives.ripme.utils.Http; +import com.rarchives.ripme.utils.Utils; +import java.io.File; +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URI; +import java.net.URISyntaxException; +import java.net.URL; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.logging.Level; +import java.util.logging.Logger; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +public class SinnercomicsRipper extends AbstractHTMLRipper { + + public SinnercomicsRipper(URL url) throws IOException { + super(url); + } + + @Override + public String getHost() { + return "sinnercomics"; + } + + @Override + public String getDomain() { + return "sinnercomics.com"; + } + + @Override + public String getGID(URL url) throws MalformedURLException { + Pattern p = Pattern.compile("^https?://sinnercomics.com/comic/([a-zA-Z0-9-]*)/?$"); + Matcher m = p.matcher(url.toExternalForm()); + if (m.matches()) { + return m.group(1); + } + throw new MalformedURLException("Expected sinnercomics.com URL format: " + + "sinnercomics.com/comic/albumName - got " + url + " instead"); + } + + @Override + public Document getFirstPage() throws IOException { + // "url" is an instance field of the superclass + return Http.url(url).get(); + } + + @Override + public Document getNextPage(Document doc) throws IOException { + // Find next page + String nextUrl = ""; + // We use comic-nav-next to the find the next page + Element elem = doc.select("a.comic-nav-next").first(); + if (elem == null) { + throw new IOException("No more pages"); + } + String nextPage = elem.attr("href"); + // Wait half a sec to avoid IP bans + sleep(500); + return Http.url(nextPage).get(); + } + + @Override + public List getURLsFromPage(Document doc) { + List result = new ArrayList(); + for (Element el : doc.select("meta[property=og:image]")) { + String imageSource = el.attr("content"); + result.add(imageSource); + } + return result; + } + + @Override + public void downloadURL(URL url, int index) { + addURLToDownload(url, getPrefix(index)); + } + + +} From 63801db2580e7a596bb94ae6faeb12b8ce234e8e Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Tue, 28 Feb 2017 02:40:16 -0500 Subject: [PATCH 10/26] Changed ripper to follow new url format --- .../com/rarchives/ripme/ripper/rippers/EightmusesRipper.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java index 3c0f1d90..7b201b24 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java @@ -40,7 +40,7 @@ public class EightmusesRipper extends AbstractHTMLRipper { @Override public String getGID(URL url) throws MalformedURLException { - Pattern p = Pattern.compile("^https?://(www\\.)?8muses\\.com/index/category/([a-zA-Z0-9\\-_]+).*$"); + Pattern p = Pattern.compile("^https?://(www\\.)?8muses\\.com/comix/album/([a-zA-Z0-9\\-_]+).*$"); Matcher m = p.matcher(url.toExternalForm()); if (!m.matches()) { throw new MalformedURLException("Expected URL format: http://www.8muses.com/index/category/albumname, got: " + url); From 602c5ce7c6ce0950134a60df74793345d584a4ac Mon Sep 17 00:00:00 2001 From: metaprime Date: Wed, 1 Mar 2017 02:18:13 -0800 Subject: [PATCH 11/26] 1.4.3: Add missing subdomain for 4chan; fix ehentai, 8muses; add zizki ripper. --- pom.xml | 2 +- ripme.json | 3 ++- src/main/java/com/rarchives/ripme/ui/UpdateUtils.java | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pom.xml b/pom.xml index 0bf086a9..e829c3a5 100644 --- a/pom.xml +++ b/pom.xml @@ -4,7 +4,7 @@ com.rarchives.ripme ripme jar - 1.4.2 + 1.4.3 ripme http://rip.rarchives.com diff --git a/ripme.json b/ripme.json index faaa049c..4c32bb05 100644 --- a/ripme.json +++ b/ripme.json @@ -1,6 +1,7 @@ { - "latestVersion" : "1.4.2", + "latestVersion" : "1.4.3", "changeList" : [ + "1.4.3: Add missing subdomain for 4chan; fix ehentai, 8muses; add zizki ripper.", "1.4.2: Added nhentai ripper.", "1.4.1: Fixed Imgbox: correctly downloads full-size images.", "1.4.0: Fixed update mechanism. Some improvements to Imgur, etc.", diff --git a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java index cc912805..1d944ab6 100644 --- a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java +++ b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java @@ -21,7 +21,7 @@ import com.rarchives.ripme.utils.Utils; public class UpdateUtils { private static final Logger logger = Logger.getLogger(UpdateUtils.class); - private static final String DEFAULT_VERSION = "1.4.2"; + private static final String DEFAULT_VERSION = "1.4.3"; private static final String updateJsonURL = "https://raw.githubusercontent.com/4pr0n/ripme/master/ripme.json"; private static final String mainFileName = "ripme.jar"; private static final String updateFileName = "ripme.jar.update"; From 65df1824a94bf6b279166548357ffbe4b2c11eb2 Mon Sep 17 00:00:00 2001 From: metaprime Date: Thu, 2 Mar 2017 01:30:05 -0800 Subject: [PATCH 12/26] Ignore txt files. --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index da543d4e..b34556d5 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,4 @@ history.json *.iml .settings/ .classpath +*.txt From 9889daab445dfbfedee4b081cbe60f099dd50628 Mon Sep 17 00:00:00 2001 From: metaprime Date: Thu, 2 Mar 2017 01:44:29 -0800 Subject: [PATCH 13/26] 1.4.4: Added SinnerComics, MyHentaiComics rippers; improve E621 ripper. --- pom.xml | 2 +- ripme.json | 3 ++- src/main/java/com/rarchives/ripme/ui/UpdateUtils.java | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pom.xml b/pom.xml index e829c3a5..eda1638f 100644 --- a/pom.xml +++ b/pom.xml @@ -4,7 +4,7 @@ com.rarchives.ripme ripme jar - 1.4.3 + 1.4.4 ripme http://rip.rarchives.com diff --git a/ripme.json b/ripme.json index 4c32bb05..224087da 100644 --- a/ripme.json +++ b/ripme.json @@ -1,6 +1,7 @@ { - "latestVersion" : "1.4.3", + "latestVersion" : "1.4.4", "changeList" : [ + "1.4.4: Added SinnerComics, MyHentaiComics rippers; improve E621 ripper.", "1.4.3: Add missing subdomain for 4chan; fix ehentai, 8muses; add zizki ripper.", "1.4.2: Added nhentai ripper.", "1.4.1: Fixed Imgbox: correctly downloads full-size images.", diff --git a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java index 1d944ab6..b78fc6b3 100644 --- a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java +++ b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java @@ -21,7 +21,7 @@ import com.rarchives.ripme.utils.Utils; public class UpdateUtils { private static final Logger logger = Logger.getLogger(UpdateUtils.class); - private static final String DEFAULT_VERSION = "1.4.3"; + private static final String DEFAULT_VERSION = "1.4.4"; private static final String updateJsonURL = "https://raw.githubusercontent.com/4pr0n/ripme/master/ripme.json"; private static final String mainFileName = "ripme.jar"; private static final String updateFileName = "ripme.jar.update"; From b3873b26b4ccc22bf91949803dffaae9d1ed971f Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Tue, 14 Mar 2017 17:16:33 -0400 Subject: [PATCH 14/26] SinnerComics: Added work around for naming bug (#472) --- .../com/rarchives/ripme/ripper/rippers/SinnercomicsRipper.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/SinnercomicsRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/SinnercomicsRipper.java index 645631f5..75062897 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/SinnercomicsRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/SinnercomicsRipper.java @@ -74,6 +74,7 @@ public class SinnercomicsRipper extends AbstractHTMLRipper { List result = new ArrayList(); for (Element el : doc.select("meta[property=og:image]")) { String imageSource = el.attr("content"); + imageSource = imageSource.replace(" alt=", ""); result.add(imageSource); } return result; From 19a88b9d3304ec5448199c8ae5d1b396800c44c9 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Sun, 19 Mar 2017 13:35:24 -0400 Subject: [PATCH 15/26] Added xhamsters new cdn domain --- .../java/com/rarchives/ripme/ripper/rippers/XhamsterRipper.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/XhamsterRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/XhamsterRipper.java index ac04b64d..c5f0cab4 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/XhamsterRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/XhamsterRipper.java @@ -48,6 +48,7 @@ public class XhamsterRipper extends AlbumRipper { image = image.replaceAll( "https://upt.xhcdn\\.", "http://up.xhamster."); + image = image.replaceAll("ept.xhcdn", "ep.xhamster"); image = image.replaceAll( "_160\\.", "_1000."); From 141478213cef1e645d486035eadb19fbd3fcfb29 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Sun, 19 Mar 2017 19:50:29 -0400 Subject: [PATCH 16/26] eroshare ripper now grabs album name --- .../ripme/ripper/rippers/EroShareRipper.java | 38 +++++++++++++------ 1 file changed, 27 insertions(+), 11 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/EroShareRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/EroShareRipper.java index 058a368c..97ab2815 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/EroShareRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/EroShareRipper.java @@ -29,11 +29,11 @@ import com.rarchives.ripme.utils.Http; * @author losipher */ public class EroShareRipper extends AbstractHTMLRipper { - + public EroShareRipper (URL url) throws IOException { super(url); } - + @Override public String getDomain() { return "eroshare.com"; @@ -43,12 +43,28 @@ public class EroShareRipper extends AbstractHTMLRipper { public String getHost() { return "eroshare"; } - + @Override public void downloadURL(URL url, int index){ addURLToDownload(url); } - + + @Override + public String getAlbumTitle(URL url) throws MalformedURLException { + try { + // Attempt to use album title as GID + Element titleElement = getFirstPage().select("meta[property=og:title]").first(); + String title = titleElement.attr("content"); + title = title.substring(title.lastIndexOf('/') + 1); + return getHost() + "_" + getGID(url) + "_" + title.trim(); + } catch (IOException e) { + // Fall back to default album naming convention + logger.info("Unable to find title at " + url); + } + return super.getAlbumTitle(url); + } + + @Override public List getURLsFromPage(Document doc){ List URLs = new ArrayList(); @@ -70,10 +86,10 @@ public class EroShareRipper extends AbstractHTMLRipper { URLs.add(videoURL); } } - + return URLs; } - + @Override public Document getFirstPage() throws IOException { Response resp = Http.url(this.url) @@ -81,10 +97,10 @@ public class EroShareRipper extends AbstractHTMLRipper { .response(); Document doc = resp.parse(); - + return doc; } - + @Override public String getGID(URL url) throws MalformedURLException { Pattern p = Pattern.compile("^https?://[w.]*eroshare.com/([a-zA-Z0-9\\-_]+)/?$"); @@ -94,9 +110,9 @@ public class EroShareRipper extends AbstractHTMLRipper { } throw new MalformedURLException("eroshare album not found in " + url + ", expected https://eroshare.com/album"); } - + public static List getURLs(URL url) throws IOException{ - + Response resp = Http.url(url) .ignoreContentType() .response(); @@ -122,7 +138,7 @@ public class EroShareRipper extends AbstractHTMLRipper { URLs.add(new URL(videoURL)); } } - + return URLs; } } From 464085bbc0af543f0544ec8b25a14f64c054d3bb Mon Sep 17 00:00:00 2001 From: MetaPrime Date: Thu, 23 Mar 2017 00:44:36 -0700 Subject: [PATCH 17/26] 1.4.5: SinnerComics: Added work around for naming bug --- pom.xml | 2 +- ripme.json | 3 ++- src/main/java/com/rarchives/ripme/ui/UpdateUtils.java | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pom.xml b/pom.xml index eda1638f..00305408 100644 --- a/pom.xml +++ b/pom.xml @@ -4,7 +4,7 @@ com.rarchives.ripme ripme jar - 1.4.4 + 1.4.5 ripme http://rip.rarchives.com diff --git a/ripme.json b/ripme.json index 224087da..c049919c 100644 --- a/ripme.json +++ b/ripme.json @@ -1,6 +1,7 @@ { - "latestVersion" : "1.4.4", + "latestVersion" : "1.4.5", "changeList" : [ + "1.4.5: SinnerComics: Added work around for naming bug", "1.4.4: Added SinnerComics, MyHentaiComics rippers; improve E621 ripper.", "1.4.3: Add missing subdomain for 4chan; fix ehentai, 8muses; add zizki ripper.", "1.4.2: Added nhentai ripper.", diff --git a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java index b78fc6b3..eb7d3ef1 100644 --- a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java +++ b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java @@ -21,7 +21,7 @@ import com.rarchives.ripme.utils.Utils; public class UpdateUtils { private static final Logger logger = Logger.getLogger(UpdateUtils.class); - private static final String DEFAULT_VERSION = "1.4.4"; + private static final String DEFAULT_VERSION = "1.4.5"; private static final String updateJsonURL = "https://raw.githubusercontent.com/4pr0n/ripme/master/ripme.json"; private static final String mainFileName = "ripme.jar"; private static final String updateFileName = "ripme.jar.update"; From 2ff45e8696878783072f79dad8988d18ea8eb749 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Thu, 23 Mar 2017 05:34:37 -0400 Subject: [PATCH 18/26] Changed imgur title grabbing to be less reliant on site style (#477) --- .../ripme/ripper/rippers/ImgurRipper.java | 25 +++++++++++-------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ImgurRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ImgurRipper.java index 89c5f935..e59826cb 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/ImgurRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ImgurRipper.java @@ -28,7 +28,7 @@ public class ImgurRipper extends AlbumRipper { HOST = "imgur"; private final int SLEEP_BETWEEN_ALBUMS; - + private Document albumDoc; static enum ALBUM_TYPE { @@ -104,12 +104,10 @@ public class ImgurRipper extends AlbumRipper { */ String title = null; - elems = albumDoc.select(".post-title"); - if (elems.size() > 0) { - Element postTitle = elems.get(0); - if (postTitle != null) { - title = postTitle.text(); - } + logger.info("Trying to get album title"); + elems = albumDoc.select("meta[property=og:title]"); + if (elems!=null) { + title = elems.attr("content"); } String albumTitle = "imgur_"; @@ -138,18 +136,25 @@ public class ImgurRipper extends AlbumRipper { case ALBUM: // Fall-through case USER_ALBUM: + logger.info("Album type is USER_ALBUM"); + // Don't call getAlbumTitle(this.url) with this + // as it seems to cause the album to be downloaded to a subdir. ripAlbum(this.url); break; case SERIES_OF_IMAGES: + logger.info("Album type is SERIES_OF_IMAGES"); ripAlbum(this.url); break; case USER: + logger.info("Album type is USER"); ripUserAccount(url); break; case SUBREDDIT: + logger.info("Album type is SUBREDDIT"); ripSubreddit(url); break; case USER_IMAGES: + logger.info("Album type is USER_IMAGES"); ripUserImages(url); break; } @@ -338,7 +343,7 @@ public class ImgurRipper extends AlbumRipper { } return imgurAlbum; } - + /** * Rips all albums in an imgur user's account. * @param url @@ -366,7 +371,7 @@ public class ImgurRipper extends AlbumRipper { } } } - + private void ripUserImages(URL url) throws IOException { int page = 0; int imagesFound = 0; int imagesTotal = 0; String jsonUrl = url.toExternalForm().replace("/all", "/ajax/images"); @@ -404,7 +409,7 @@ public class ImgurRipper extends AlbumRipper { } } } - + private void ripSubreddit(URL url) throws IOException { int page = 0; while (true) { From bacc69dff51df9ffeeedc741e098abd3a1bcb4dc Mon Sep 17 00:00:00 2001 From: MetaPrime Date: Thu, 23 Mar 2017 02:38:33 -0700 Subject: [PATCH 19/26] 1.4.6: Eroshare: get album names; Imgur: improve grabbing album name. --- pom.xml | 2 +- ripme.json | 3 ++- src/main/java/com/rarchives/ripme/ui/UpdateUtils.java | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pom.xml b/pom.xml index 00305408..51fd66f9 100644 --- a/pom.xml +++ b/pom.xml @@ -4,7 +4,7 @@ com.rarchives.ripme ripme jar - 1.4.5 + 1.4.6 ripme http://rip.rarchives.com diff --git a/ripme.json b/ripme.json index c049919c..951ad1a1 100644 --- a/ripme.json +++ b/ripme.json @@ -1,6 +1,7 @@ { - "latestVersion" : "1.4.5", + "latestVersion" : "1.4.6", "changeList" : [ + "1.4.6: Eroshare: get album names; Imgur: improve grabbing album name.", "1.4.5: SinnerComics: Added work around for naming bug", "1.4.4: Added SinnerComics, MyHentaiComics rippers; improve E621 ripper.", "1.4.3: Add missing subdomain for 4chan; fix ehentai, 8muses; add zizki ripper.", diff --git a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java index eb7d3ef1..fec1fecd 100644 --- a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java +++ b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java @@ -21,7 +21,7 @@ import com.rarchives.ripme.utils.Utils; public class UpdateUtils { private static final Logger logger = Logger.getLogger(UpdateUtils.class); - private static final String DEFAULT_VERSION = "1.4.5"; + private static final String DEFAULT_VERSION = "1.4.6"; private static final String updateJsonURL = "https://raw.githubusercontent.com/4pr0n/ripme/master/ripme.json"; private static final String mainFileName = "ripme.jar"; private static final String updateFileName = "ripme.jar.update"; From 23e464d0c102c9fc2447e840a770028447e1d606 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Fri, 24 Mar 2017 17:58:17 -0400 Subject: [PATCH 20/26] improved xhamster cdn replacing regex --- .../java/com/rarchives/ripme/ripper/rippers/XhamsterRipper.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/XhamsterRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/XhamsterRipper.java index c5f0cab4..20fdb955 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/XhamsterRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/XhamsterRipper.java @@ -48,7 +48,7 @@ public class XhamsterRipper extends AlbumRipper { image = image.replaceAll( "https://upt.xhcdn\\.", "http://up.xhamster."); - image = image.replaceAll("ept.xhcdn", "ep.xhamster"); + image = image.replaceAll("ept\\.xhcdn", "ep.xhamster"); image = image.replaceAll( "_160\\.", "_1000."); From e71f2d392f2a506555517339a35f01b925756f5f Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Tue, 28 Mar 2017 09:29:27 -0400 Subject: [PATCH 21/26] Added support thechive.com --- .../ripme/ripper/rippers/ThechiveRipper.java | 78 +++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 src/main/java/com/rarchives/ripme/ripper/rippers/ThechiveRipper.java diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ThechiveRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ThechiveRipper.java new file mode 100644 index 00000000..999eb6f8 --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ThechiveRipper.java @@ -0,0 +1,78 @@ + +package com.rarchives.ripme.ripper.rippers; + +import com.rarchives.ripme.ripper.AbstractHTMLRipper; +import com.rarchives.ripme.utils.Http; +import com.rarchives.ripme.utils.Utils; +import java.io.File; +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URI; +import java.net.URISyntaxException; +import java.net.URL; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.logging.Level; +import java.util.logging.Logger; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +public class ThechiveRipper extends AbstractHTMLRipper { + public static boolean isTag; + + public ThechiveRipper(URL url) throws IOException { + super(url); + } + + @Override + public String getHost() { + return "thechive"; + } + + @Override + public String getDomain() { + return "thechive.com"; + } + + @Override + public String getGID(URL url) throws MalformedURLException { + Pattern p = Pattern.compile("^https?://thechive.com/[0-9]*/[0-9]*/[0-9]*/([a-zA-Z0-9_\\-]*)/?$"); + Matcher m = p.matcher(url.toExternalForm()); + if (m.matches()) { + isTag = false; + return m.group(1); + } + throw new MalformedURLException("Expected thechive.com URL format: " + + "thechive.com/YEAR/MONTH/DAY/POSTTITLE/ - got " + url + " instead"); + } + + @Override + public Document getFirstPage() throws IOException { + // "url" is an instance field of the superclass + return Http.url(url).get(); + } + + @Override + public List getURLsFromPage(Document doc) { + List result = new ArrayList(); + for (Element el : doc.select("img.attachment-gallery-item-full")) { + String imageSource = el.attr("src"); + // We replace thumbs with resizes so we can the full sized images + imageSource = imageSource.replace("thumbs", "resizes"); + result.add(imageSource); + } + return result; + } + + @Override + public void downloadURL(URL url, int index) { + addURLToDownload(url, getPrefix(index)); + } + + +} From ef206fb7a6ab5340f4eb67470c48ea6291e6f9f5 Mon Sep 17 00:00:00 2001 From: metaprime Date: Mon, 19 Dec 2016 23:06:35 -0800 Subject: [PATCH 22/26] Fix #370: Fix NewsfilterRipper. --- .gitignore | 1 + .../ripper/rippers/NewsfilterRipper.java | 68 ++++++++----------- 2 files changed, 29 insertions(+), 40 deletions(-) diff --git a/.gitignore b/.gitignore index b34556d5..63b6da1b 100644 --- a/.gitignore +++ b/.gitignore @@ -13,3 +13,4 @@ history.json .settings/ .classpath *.txt +bin/ diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/NewsfilterRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/NewsfilterRipper.java index 3eb1b43d..1c7cf8dc 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/NewsfilterRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/NewsfilterRipper.java @@ -1,19 +1,19 @@ package com.rarchives.ripme.ripper.rippers; - -import com.rarchives.ripme.ripper.AlbumRipper; -import org.jsoup.Connection; -import org.jsoup.Jsoup; -import org.jsoup.nodes.Document; -import org.jsoup.nodes.Element; -import org.jsoup.select.Elements; - import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; import java.util.regex.Matcher; import java.util.regex.Pattern; +import org.jsoup.Connection; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +import com.rarchives.ripme.ripper.AlbumRipper; + public class NewsfilterRipper extends AlbumRipper { private static final String HOST = "newsfilter"; @@ -33,7 +33,7 @@ public class NewsfilterRipper extends AlbumRipper { public URL sanitizeURL(URL url) throws MalformedURLException { String u = url.toExternalForm(); if (u.indexOf('#') >= 0) { - u = u.substring(0, u.indexOf('#')); + u = u.substring(0, u.indexOf('#')); } u = u.replace("https?://m\\.newsfilter\\.org", "http://newsfilter.org"); return new URL(u); @@ -41,39 +41,28 @@ public class NewsfilterRipper extends AlbumRipper { @Override public void rip() throws IOException { - String gid = getGID(this.url), - theurl = "http://newsfilter.org/gallery/" + gid; - - Connection.Response resp = null; + String gid = getGID(this.url); + String theurl = "http://newsfilter.org/gallery/" + gid; logger.info("Loading " + theurl); - resp = Jsoup.connect(theurl) - .timeout(5000) - .referrer("") - .userAgent(USER_AGENT) - .method(Connection.Method.GET) - .execute(); + Connection.Response resp = Jsoup.connect(theurl) + .timeout(5000) + .referrer("") + .userAgent(USER_AGENT) + .method(Connection.Method.GET) + .execute(); Document doc = resp.parse(); - //Element gallery = doc.getElementById("thegalmain"); - //Elements piclinks = gallery.getElementsByAttributeValue("itemprop","contentURL"); - Pattern pat = Pattern.compile(gid+"/\\d+"); - Elements piclinks = doc.getElementsByAttributeValueMatching("href", pat); - for (Element picelem : piclinks) { - String picurl = "http://newsfilter.org"+picelem.attr("href"); - logger.info("Getting to picture page: "+picurl); - resp = Jsoup.connect(picurl) - .timeout(5000) - .referrer(theurl) - .userAgent(USER_AGENT) - .method(Connection.Method.GET) - .execute(); - Document picdoc = resp.parse(); - String dlurl = picdoc.getElementsByAttributeValue("itemprop","contentURL").first().attr("src"); - addURLToDownload(new URL(dlurl)); + + Elements thumbnails = doc.select("#galleryImages .inner-block img"); + for (Element thumb : thumbnails) { + String thumbUrl = thumb.attr("src"); + String picUrl = thumbUrl.replace("thumbs/", ""); + addURLToDownload(new URL(picUrl)); } + waitForThreads(); } - + @Override public String getHost() { return HOST; @@ -86,9 +75,8 @@ public class NewsfilterRipper extends AlbumRipper { if (m.matches()) { return m.group(2); } - throw new MalformedURLException("Expected newsfilter gallery format: " - + "http://newsfilter.org/gallery/galleryid" - + " Got: " + url); + throw new MalformedURLException( + "Expected newsfilter gallery format: http://newsfilter.org/gallery/galleryid" + + " Got: " + url); } - } From c4fe08d54f9e37b937c5f024da19133d983447c6 Mon Sep 17 00:00:00 2001 From: metaprime Date: Wed, 5 Apr 2017 20:48:25 -0700 Subject: [PATCH 23/26] Update README for new request issue. --- README.md | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 24eb60fe..987a4646 100644 --- a/README.md +++ b/README.md @@ -24,7 +24,7 @@ For information about running the `.jar` file, see [the How To Run wiki](https:/ * Quickly downloads all images in an online album (see supported sites below) * Easily re-rip albums to fetch new content -## Supported sites: +## [List of Supported Sites](https://github.com/4pr0n/ripme/wiki/Supported-Sites) * imgur * twitter @@ -44,11 +44,9 @@ For information about running the `.jar` file, see [the How To Run wiki](https:/ * xhamster * (more) -### [Full updated list](https://github.com/4pr0n/ripme/issues/8) - ## Not Supported? -Request support for more sites by adding a comment to [this Github issue](https://github.com/4pr0n/ripme/issues/8). +Request support for more sites by adding a comment to [this Github issue](https://github.com/4pr0n/ripme/issues/502). If you're a developer, you can add your own by following the wiki guide [How To Create A Ripper for HTML Websites](https://github.com/4pr0n/ripme/wiki/How-To-Create-A-Ripper-for-HTML-websites). From 036cff3a3076deb2a1b68b4a15628aa84f1ecc61 Mon Sep 17 00:00:00 2001 From: metaprime Date: Tue, 25 Apr 2017 03:20:54 -0700 Subject: [PATCH 24/26] Removed MinusRipper (dead website) --- .../ripme/ripper/rippers/MinusRipper.java | 187 ------------------ 1 file changed, 187 deletions(-) delete mode 100644 src/main/java/com/rarchives/ripme/ripper/rippers/MinusRipper.java diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/MinusRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/MinusRipper.java deleted file mode 100644 index f51dc584..00000000 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/MinusRipper.java +++ /dev/null @@ -1,187 +0,0 @@ -package com.rarchives.ripme.ripper.rippers; - -import java.io.IOException; -import java.net.MalformedURLException; -import java.net.URL; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -import org.json.JSONArray; -import org.json.JSONObject; -import org.jsoup.nodes.Document; -import org.jsoup.select.Elements; - -import com.rarchives.ripme.ripper.AlbumRipper; -import com.rarchives.ripme.utils.Http; -import com.rarchives.ripme.utils.Utils; - -public class MinusRipper extends AlbumRipper { - - private static final String DOMAIN = "minus.com", - HOST = "minus"; - - private Document albumDoc = null; - - private static enum ALBUM_TYPE { - GUEST, - ACCOUNT_ALBUM, - ACCOUNT - } - private ALBUM_TYPE albumType; - - public MinusRipper(URL url) throws IOException { - super(url); - } - - @Override - public String getHost() { - return HOST; - } - - public URL sanitizeURL(URL url) throws MalformedURLException { - getGID(url); - return url; - } - - public String getAlbumTitle(URL url) throws MalformedURLException { - try { - // Attempt to use album title as GID - if (albumDoc == null) { - albumDoc = Http.url(url).get(); - } - Elements titles = albumDoc.select("meta[property=og:title]"); - if (titles.size() > 0) { - return HOST + "_" + titles.get(0).attr("content"); - } - } catch (IOException e) { - // Fall back to default album naming convention - } - return super.getAlbumTitle(url); - } - - @Override - public String getGID(URL url) throws MalformedURLException { - // http://vampyr3.minus.com/ - // http://vampyr3.minus.com/uploads - // http://minus.com/mw7ztQ6xzP7ae - // http://vampyr3.minus.com/mw7ztQ6xzP7ae - String u = url.toExternalForm(); - u = u.replace("/www.minus.com", "/minus.com"); - u = u.replace("/i.minus.com", "/minus.com"); - Pattern p; Matcher m; - - p = Pattern.compile("^https?://minus\\.com/m([a-zA-Z0-9]+).*$"); - m = p.matcher(u); - if (m.matches()) { - albumType = ALBUM_TYPE.GUEST; - return "guest_" + m.group(1); - } - - p = Pattern.compile("^https?://([a-zA-Z0-9\\-_]+)\\.minus\\.com/m([a-zA-Z0-9]+).*$"); - m = p.matcher(u); - if (m.matches()) { - albumType = ALBUM_TYPE.ACCOUNT_ALBUM; - return m.group(1) + "_" + m.group(2); - } - - p = Pattern.compile("^https?://([a-zA-Z0-9]+)\\.minus\\.com/?(uploads)?$"); - m = p.matcher(u); - if (m.matches()) { - albumType = ALBUM_TYPE.ACCOUNT; - return m.group(1); - } - - throw new MalformedURLException( - "Expected minus.com album URL formats: " - + "username.minus.com or " - + "username.minus.com/m... or " - + "minus.com/m..." - + " Got: " + url); - } - - @Override - public void rip() throws IOException { - switch (albumType) { - case ACCOUNT: - ripAccount(this.url); - break; - case ACCOUNT_ALBUM: - ripAlbum(this.url); - break; - case GUEST: - ripAlbum(this.url); - break; - } - waitForThreads(); - } - - private void ripAccount(URL url) throws IOException { - Pattern p = Pattern.compile("^https?://([a-zA-Z0-9\\-_]+)\\.minus\\.com.*$"); - Matcher m = p.matcher(url.toExternalForm()); - if (!m.matches()) { - throw new IOException("Could not find username from URL " + url); - } - String user = m.group(1); - int page = 1; - while (true) { - String jsonUrl = "http://" + user - + ".minus.com/api/pane/user/" - + user + "/shares.json/" - + page; - logger.info(" Retrieving " + jsonUrl); - JSONObject json = Http.url(jsonUrl).getJSON(); - JSONArray galleries = json.getJSONArray("galleries"); - for (int i = 0; i < galleries.length(); i++) { - JSONObject gallery = galleries.getJSONObject(i); - String title = gallery.getString("name"); - String albumUrl = "http://" + user + ".minus.com/m" + gallery.getString("reader_id"); - ripAlbum(new URL(albumUrl), Utils.filesystemSafe(title)); - - if (isThisATest()) { - break; - } - } - if (page >= json.getInt("total_pages") || isThisATest()) { - break; - } - page++; - } - } - - private void ripAlbum(URL url) throws IOException { - ripAlbum(url, ""); - } - private void ripAlbum(URL url, String subdir) throws IOException { - logger.info(" Retrieving " + url.toExternalForm()); - if (albumDoc == null || !subdir.equals("")) { - albumDoc = Http.url(url).get(); - } - Pattern p = Pattern.compile("^.*var gallerydata = (\\{.*\\});.*$", Pattern.DOTALL); - Matcher m = p.matcher(albumDoc.data()); - if (m.matches()) { - JSONObject json = new JSONObject(m.group(1)); - JSONArray items = json.getJSONArray("items"); - for (int i = 0; i < items.length(); i++) { - JSONObject item = items.getJSONObject(i); - String extension = item.getString("name"); - extension = extension.substring(extension.lastIndexOf('.')); - String image = "http://i.minus.com/i" - + item.getString("id") - + extension; - String prefix = ""; - if (Utils.getConfigBoolean("download.save_order", true)) { - prefix = String.format("%03d_", i + 1); - } - addURLToDownload(new URL(image), prefix, subdir); - if (isThisATest()) { - break; - } - } - } - } - - public boolean canRip(URL url) { - return url.getHost().endsWith(DOMAIN); - } - -} \ No newline at end of file From 2ce3b34befc45ad464a7b5a7b58a56cdae768774 Mon Sep 17 00:00:00 2001 From: metaprime Date: Tue, 25 Apr 2017 03:21:24 -0700 Subject: [PATCH 25/26] Ignore .vscode/ directory --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 63b6da1b..a4cd2929 100644 --- a/.gitignore +++ b/.gitignore @@ -14,3 +14,4 @@ history.json .classpath *.txt bin/ +.vscode/ From 2e5c4deb5e0b14f99e81d7d8be52562e7998a334 Mon Sep 17 00:00:00 2001 From: metaprime Date: Tue, 25 Apr 2017 03:39:21 -0700 Subject: [PATCH 26/26] 1.4.7: Fixed NewsFilter, XHamster; added TheChiveRipper --- pom.xml | 2 +- ripme.json | 3 ++- src/main/java/com/rarchives/ripme/ui/UpdateUtils.java | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pom.xml b/pom.xml index 51fd66f9..f511813a 100644 --- a/pom.xml +++ b/pom.xml @@ -4,7 +4,7 @@ com.rarchives.ripme ripme jar - 1.4.6 + 1.4.7 ripme http://rip.rarchives.com diff --git a/ripme.json b/ripme.json index 951ad1a1..ebbc63b9 100644 --- a/ripme.json +++ b/ripme.json @@ -1,6 +1,7 @@ { - "latestVersion" : "1.4.6", + "latestVersion" : "1.4.7", "changeList" : [ + "1.4.7: Fixed NewsFilter, XHamster; added TheChiveRipper", "1.4.6: Eroshare: get album names; Imgur: improve grabbing album name.", "1.4.5: SinnerComics: Added work around for naming bug", "1.4.4: Added SinnerComics, MyHentaiComics rippers; improve E621 ripper.", diff --git a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java index fec1fecd..ad4cc1ae 100644 --- a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java +++ b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java @@ -21,7 +21,7 @@ import com.rarchives.ripme.utils.Utils; public class UpdateUtils { private static final Logger logger = Logger.getLogger(UpdateUtils.class); - private static final String DEFAULT_VERSION = "1.4.6"; + private static final String DEFAULT_VERSION = "1.4.7"; private static final String updateJsonURL = "https://raw.githubusercontent.com/4pr0n/ripme/master/ripme.json"; private static final String mainFileName = "ripme.jar"; private static final String updateFileName = "ripme.jar.update";