Store Cheeby albums in separate folders

Single-image posts go in root directory.
Also maintain order of albums.

For #146
This commit is contained in:
4pr0n 2015-01-11 15:07:38 -08:00
parent c3b82fff32
commit 7fe17a8f00

View File

@ -4,7 +4,9 @@ import java.io.IOException;
import java.net.MalformedURLException; import java.net.MalformedURLException;
import java.net.URL; import java.net.URL;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
@ -12,11 +14,13 @@ import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element; import org.jsoup.nodes.Element;
import com.rarchives.ripme.ripper.AbstractHTMLRipper; import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
import com.rarchives.ripme.utils.Http; import com.rarchives.ripme.utils.Http;
public class CheebyRipper extends AbstractHTMLRipper { public class CheebyRipper extends AbstractHTMLRipper {
private int offset = 0; private int offset = 0;
private Map<String, Integer> albumSets = new HashMap<String, Integer>();
public CheebyRipper(URL url) throws IOException { public CheebyRipper(URL url) throws IOException {
super(url); super(url);
@ -65,19 +69,99 @@ public class CheebyRipper extends AbstractHTMLRipper {
return nextDoc; return nextDoc;
} }
@Override
public void downloadURL(URL url, int index) {
// Not implmeneted here
}
@Override @Override
public List<String> getURLsFromPage(Document page) { public List<String> getURLsFromPage(Document page) {
List<String> imageURLs = new ArrayList<String>(); // Not implemented here
return null;
}
public List<Image> getImagesFromPage(Document page) {
List<Image> imageURLs = new ArrayList<Image>();
for (Element image : page.select("div.i a img")) { for (Element image : page.select("div.i a img")) {
// Get image URL
String imageURL = image.attr("src"); String imageURL = image.attr("src");
imageURL = imageURL.replace("s.", "."); imageURL = imageURL.replace("s.", ".");
imageURLs.add(imageURL);
// Get "album" from image link
String href = image.parent().attr("href");
while (href.endsWith("/")) {
href = href.substring(0, href.length() - 2);
}
String[] hrefs = href.split("/");
String prefix = hrefs[hrefs.length - 1];
// Keep track of how many images are in this album
int albumSetCount = 0;
if (albumSets.containsKey(prefix)) {
albumSetCount = albumSets.get(prefix);
}
albumSetCount++;
albumSets.put(prefix, albumSetCount);
imageURLs.add(new Image(imageURL, prefix, albumSetCount));
} }
return imageURLs; return imageURLs;
} }
@Override @Override
public void downloadURL(URL url, int index) { public void rip() throws IOException {
addURLToDownload(url, getPrefix(index)); logger.info("Retrieving " + this.url);
sendUpdate(STATUS.LOADING_RESOURCE, this.url.toExternalForm());
Document doc = getFirstPage();
while (doc != null) {
List<Image> images = getImagesFromPage(doc);
if (images.size() == 0) {
throw new IOException("No images found at " + doc.location());
}
for (Image image : images) {
if (isStopped()) {
break;
}
// Don't create subdirectory if "album" only has 1 image
if (albumSets.get(image.prefix) > 1) {
addURLToDownload(new URL(image.url), getPrefix(image.index), image.prefix);
}
else {
addURLToDownload(new URL(image.url));
}
}
if (isStopped()) {
break;
}
try {
sendUpdate(STATUS.LOADING_RESOURCE, "next page");
doc = getNextPage(doc);
} catch (IOException e) {
logger.info("Can't get next page: " + e.getMessage());
break;
}
}
// If they're using a thread pool, wait for it.
if (getThreadPool() != null) {
getThreadPool().waitForThreads();
}
waitForThreads();
}
private class Image {
String url, prefix;
int index;
public Image(String url, String prefix, int index) {
this.url = url;
this.prefix = prefix;
this.index = index;
}
} }
} }