From 7fe17a8f000b6c30d3ca05188ff5421497c3e253 Mon Sep 17 00:00:00 2001 From: 4pr0n Date: Sun, 11 Jan 2015 15:07:38 -0800 Subject: [PATCH] Store Cheeby albums in separate folders Single-image posts go in root directory. Also maintain order of albums. For #146 --- .../ripme/ripper/rippers/CheebyRipper.java | 92 ++++++++++++++++++- 1 file changed, 88 insertions(+), 4 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/CheebyRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/CheebyRipper.java index efe5920a..182c46b3 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/CheebyRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/CheebyRipper.java @@ -4,7 +4,9 @@ import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -12,11 +14,13 @@ import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import com.rarchives.ripme.ripper.AbstractHTMLRipper; +import com.rarchives.ripme.ui.RipStatusMessage.STATUS; import com.rarchives.ripme.utils.Http; public class CheebyRipper extends AbstractHTMLRipper { private int offset = 0; + private Map albumSets = new HashMap(); public CheebyRipper(URL url) throws IOException { super(url); @@ -65,19 +69,99 @@ public class CheebyRipper extends AbstractHTMLRipper { return nextDoc; } + @Override + public void downloadURL(URL url, int index) { + // Not implmeneted here + } + @Override public List getURLsFromPage(Document page) { - List imageURLs = new ArrayList(); + // Not implemented here + return null; + } + + public List getImagesFromPage(Document page) { + List imageURLs = new ArrayList(); for (Element image : page.select("div.i a img")) { + // Get image URL String imageURL = image.attr("src"); imageURL = imageURL.replace("s.", "."); - imageURLs.add(imageURL); + + // Get "album" from image link + String href = image.parent().attr("href"); + while (href.endsWith("/")) { + href = href.substring(0, href.length() - 2); + } + String[] hrefs = href.split("/"); + String prefix = hrefs[hrefs.length - 1]; + + // Keep track of how many images are in this album + int albumSetCount = 0; + if (albumSets.containsKey(prefix)) { + albumSetCount = albumSets.get(prefix); + } + albumSetCount++; + albumSets.put(prefix, albumSetCount); + + imageURLs.add(new Image(imageURL, prefix, albumSetCount)); + } return imageURLs; } @Override - public void downloadURL(URL url, int index) { - addURLToDownload(url, getPrefix(index)); + public void rip() throws IOException { + logger.info("Retrieving " + this.url); + sendUpdate(STATUS.LOADING_RESOURCE, this.url.toExternalForm()); + Document doc = getFirstPage(); + + while (doc != null) { + List images = getImagesFromPage(doc); + + if (images.size() == 0) { + throw new IOException("No images found at " + doc.location()); + } + + for (Image image : images) { + if (isStopped()) { + break; + } + // Don't create subdirectory if "album" only has 1 image + if (albumSets.get(image.prefix) > 1) { + addURLToDownload(new URL(image.url), getPrefix(image.index), image.prefix); + } + else { + addURLToDownload(new URL(image.url)); + } + } + + if (isStopped()) { + break; + } + + try { + sendUpdate(STATUS.LOADING_RESOURCE, "next page"); + doc = getNextPage(doc); + } catch (IOException e) { + logger.info("Can't get next page: " + e.getMessage()); + break; + } + } + + // If they're using a thread pool, wait for it. + if (getThreadPool() != null) { + getThreadPool().waitForThreads(); + } + waitForThreads(); + } + + private class Image { + String url, prefix; + int index; + public Image(String url, String prefix, int index) { + this.url = url; + this.prefix = prefix; + this.index = index; + } } }