From 14afa9695ceee43cb255055b1bd7f935a7833dcb Mon Sep 17 00:00:00 2001 From: 4pr0n Date: Wed, 12 Mar 2014 19:29:31 -0700 Subject: [PATCH] 8Muses support --- .../ripper/rippers/EightmusesRipper.java | 79 +++++++++++++++++++ .../ripper/rippers/EightmusesRipperTest.java | 33 ++++++++ 2 files changed, 112 insertions(+) create mode 100644 src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java create mode 100644 src/test/java/com/rarchives/ripme/tst/ripper/rippers/EightmusesRipperTest.java diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java new file mode 100644 index 00000000..9d0559bb --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java @@ -0,0 +1,79 @@ +package com.rarchives.ripme.ripper.rippers; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.log4j.Logger; +import org.jsoup.Connection.Response; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; + +import com.rarchives.ripme.ripper.AbstractRipper; + +public class EightmusesRipper extends AbstractRipper { + + private static final String DOMAIN = "8muses.com", + HOST = "8muses"; + private static final Logger logger = Logger.getLogger(EightmusesRipper.class); + + public EightmusesRipper(URL url) throws IOException { + super(url); + } + + @Override + public boolean canRip(URL url) { + return url.getHost().endsWith(DOMAIN); + } + + @Override + public URL sanitizeURL(URL url) throws MalformedURLException { + return url; + } + + @Override + public void rip() throws IOException { + logger.info(" Retrieving " + this.url); + Response resp = Jsoup.connect(this.url.toExternalForm()) + .userAgent(USER_AGENT) + .execute(); + Document doc = resp.parse(); + int index = 0; + for (Element thumb : doc.select("img")) { + if (!thumb.hasAttr("data-cfsrc")) { + continue; + } + String image = thumb.attr("data-cfsrc"); + if (image.contains("-cu_")) { + image = image.replaceAll("-cu_[^.]+", "-me"); + } + if (image.startsWith("//")) { + image = "http:" + image; + } + //image = image.replace(" ", "%20"); + URL imageURL = new URL(image); + index += 1; + addURLToDownload(imageURL, String.format("%03d_", index)); + } + waitForThreads(); + } + + @Override + public String getHost() { + return HOST; + } + + @Override + public String getGID(URL url) throws MalformedURLException { + Pattern p = Pattern.compile("^https?://(www\\.)?8muses\\.com/index/category/([a-zA-Z0-9\\-_]+).*$"); + Matcher m = p.matcher(url.toExternalForm()); + if (!m.matches()) { + throw new MalformedURLException("Expected URL format: http://www.8muses.com/index/category/albumname, got: " + url); + } + return m.group(m.groupCount()); + } + +} diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/EightmusesRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/EightmusesRipperTest.java new file mode 100644 index 00000000..d0662f57 --- /dev/null +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/EightmusesRipperTest.java @@ -0,0 +1,33 @@ +package com.rarchives.ripme.tst.ripper.rippers; + +import java.io.IOException; +import java.net.URL; +import java.util.ArrayList; +import java.util.List; + +import com.rarchives.ripme.ripper.rippers.EightmusesRipper; + +public class EightmusesRipperTest extends RippersTest { + + public void testEightmusesAlbums() throws IOException { + if (!DOWNLOAD_CONTENT) { + return; + } + List contentURLs = new ArrayList(); + + contentURLs.add(new URL("http://www.8muses.com/index/category/jab-hotassneighbor7")); + + for (URL url : contentURLs) { + try { + EightmusesRipper ripper = new EightmusesRipper(url); + ripper.rip(); + assert(ripper.getWorkingDir().listFiles().length > 1); + deleteDir(ripper.getWorkingDir()); + } catch (Exception e) { + e.printStackTrace(); + fail("Error while ripping URL " + url + ": " + e.getMessage()); + } + } + } + +}