From c5eb83a33863de31946e8bc239928be37adf1720 Mon Sep 17 00:00:00 2001 From: 4pr0n Date: Wed, 4 Jun 2014 20:23:21 -0700 Subject: [PATCH] 1.0.53 - 8Muses ripper can rip subalbums/comics Should close #52 since it also grabs various image types that aren't consistent across all albums --- pom.xml | 2 +- .../ripper/rippers/EightmusesRipper.java | 158 +++++++++++++++--- .../com/rarchives/ripme/ui/UpdateUtils.java | 2 +- 3 files changed, 135 insertions(+), 27 deletions(-) diff --git a/pom.xml b/pom.xml index e7698687..b56ac15b 100644 --- a/pom.xml +++ b/pom.xml @@ -4,7 +4,7 @@ com.rarchives.ripme ripme jar - 1.0.52 + 1.0.53 ripme http://rip.rarchives.com diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java index ac800d3a..876d8059 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java @@ -1,5 +1,6 @@ package com.rarchives.ripme.ripper.rippers; +import java.io.File; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; @@ -7,12 +8,14 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.log4j.Logger; +import org.jsoup.Connection.Method; import org.jsoup.Connection.Response; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import com.rarchives.ripme.ripper.AlbumRipper; +import com.rarchives.ripme.ui.RipStatusMessage.STATUS; import com.rarchives.ripme.utils.Utils; public class EightmusesRipper extends AlbumRipper { @@ -21,6 +24,8 @@ public class EightmusesRipper extends AlbumRipper { HOST = "8muses"; private static final Logger logger = Logger.getLogger(EightmusesRipper.class); + private Document albumDoc = null; + public EightmusesRipper(URL url) throws IOException { super(url); } @@ -34,36 +39,139 @@ public class EightmusesRipper extends AlbumRipper { public URL sanitizeURL(URL url) throws MalformedURLException { return url; } + + @Override + public String getAlbumTitle(URL url) throws MalformedURLException { + try { + // Attempt to use album title as GID + if (albumDoc == null) { + albumDoc = Jsoup.connect(url.toExternalForm()) + .userAgent(USER_AGENT) + .method(Method.GET) + .timeout(Utils.getConfigInteger("download.timeout", 5000)) + .execute() + .parse(); + } + Element titleElement = albumDoc.select("meta[name=description]").first(); + String title = titleElement.attr("content"); + title = title.substring(title.lastIndexOf('/') + 1); + return HOST + "_" + title.trim(); + } catch (IOException e) { + // Fall back to default album naming convention + logger.info("Unable to find title at " + url); + } + return super.getAlbumTitle(url); + } @Override public void rip() throws IOException { - logger.info(" Retrieving " + this.url); - Response resp = Jsoup.connect(this.url.toExternalForm()) - .userAgent(USER_AGENT) - .execute(); - Document doc = resp.parse(); - int index = 0; - for (Element thumb : doc.select("img")) { - if (!thumb.hasAttr("data-cfsrc")) { - continue; - } - String image = thumb.attr("data-cfsrc"); - if (image.contains("-cu_")) { - image = image.replaceAll("-cu_[^.]+", "-me"); - } - if (image.startsWith("//")) { - image = "http:" + image; - } - index += 1; - URL imageURL = new URL(image); - String prefix = ""; - if (Utils.getConfigBoolean("download.save_order", true)) { - prefix = String.format("%03d_", index); - } - addURLToDownload(imageURL, prefix); - } + ripAlbum(this.url.toExternalForm(), this.workingDir); waitForThreads(); } + + private void ripAlbum(String url, File subdir) throws IOException { + logger.info(" Retrieving " + url); + sendUpdate(STATUS.LOADING_RESOURCE, url); + if (albumDoc == null) { + Response resp = Jsoup.connect(url) + .userAgent(USER_AGENT) + .timeout(Utils.getConfigInteger("download.timeout", 5000)) + .execute(); + albumDoc = resp.parse(); + } + + int index = 0; // Both album index and image index + if (albumDoc.select(".preview > span").size() > 0) { + // Page contains subalbums (not images) + for (Element subalbum : albumDoc.select("a.preview")) { + ripSubalbumFromPreview(subalbum, subdir, ++index); + } + } + else { + // Page contains images + for (Element thumb : albumDoc.select("img")) { + downloadImage(thumb, subdir, ++index); + } + } + } + + /** + * @param subalbum Anchor element of a subalbum + * @throws IOException + */ + private void ripSubalbumFromPreview(Element subalbum, File subdir, int index) throws IOException { + // Find + sanitize URL from Element + String subUrl = subalbum.attr("href"); + subUrl = subUrl.replaceAll("\\.\\./", ""); + if (subUrl.startsWith("//")) { + subUrl = "http:"; + } + else if (!subUrl.startsWith("http://")) { + subUrl = "http://www.8muses.com/" + subUrl; + } + // Prepend image index if enabled + // Get album title + String subTitle = subalbum.attr("alt"); + if (subTitle.equals("")) { + subTitle = getGID(new URL(subUrl)); + } + subTitle = Utils.filesystemSafe(subTitle); + // Create path to subdirectory + File subDir = new File(subdir.getAbsolutePath() + File.separator + subTitle); + if (!subDir.exists()) { + subDir.mkdirs(); + } + albumDoc = null; + ripAlbum(subUrl, subDir); + try { + Thread.sleep(2000); + } catch (InterruptedException e) { + logger.warn("Interrupted whiel waiting to load next album"); + } + } + + private void downloadImage(Element thumb, File subdir, int index) { + // Find thumbnail image source + String image = null; + if (thumb.hasAttr("data-cfsrc")) { + image = thumb.attr("data-cfsrc"); + } + else if (thumb.hasAttr("src")) { + image = thumb.attr("src"); + } + else { + logger.warn("Thumb does not havedata-cfsrc or src: " + thumb); + return; + } + // Remove relative directory path naming + image = image.replaceAll("\\.\\./", ""); + if (image.startsWith("//")) { + image = "http:" + image; + } + // Convert from thumb URL to full-size + if (image.contains("-cu_")) { + image = image.replaceAll("-cu_[^.]+", "-me"); + } + // Set download path + try { + URL imageURL = new URL(image); + String saveAs = subdir.getAbsolutePath() + File.separator; + if (Utils.getConfigBoolean("download.save_order", true)) { + // Append image index + saveAs += String.format("%03d_", index); + } + // Append image title + saveAs += Utils.filesystemSafe(thumb.attr("title")); + // Append extension + saveAs += image.substring(image.lastIndexOf('.')); + File saveFile = new File(saveAs); + // Download + addURLToDownload(imageURL, saveFile, thumb.baseUri(), null); + } catch (IOException e) { + logger.error("Failed to download image at " + image, e); + sendUpdate(STATUS.DOWNLOAD_ERRORED, "Failed to download image at " + image); + } + } @Override public String getHost() { diff --git a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java index d5c7ad24..afa773e6 100644 --- a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java +++ b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java @@ -21,7 +21,7 @@ import com.rarchives.ripme.utils.Utils; public class UpdateUtils { private static final Logger logger = Logger.getLogger(UpdateUtils.class); - private static final String DEFAULT_VERSION = "1.0.52"; + private static final String DEFAULT_VERSION = "1.0.53"; private static final String updateJsonURL = "http://rarchives.com/ripme.json"; private static final String updateJarURL = "http://rarchives.com/ripme.jar"; private static final String mainFileName = "ripme.jar";