ImgScroll/src/main/java/com/rarchives/ripme/ripper/rippers/SmuttyRipper.java
4pr0n c5ea044f79 Various fixes to tests:
Ability to set log level, lots of debugging messages
Turn on debug logging during tests, simplified test cases for HTML ripper

Fix fusktator ripper, added test
Fixed gifyo, added test

Added tests for *all* rippers

Adding a few album-guessing URLs
2015-02-12 22:46:10 -08:00

124 lines
3.9 KiB
Java

package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import com.rarchives.ripme.ripper.AlbumRipper;
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
import com.rarchives.ripme.utils.Http;
/**
* Appears to be broken as of 2015-02-11.
* Generating large image from thumbnail requires replacing "/m/" with something else:
* -> Sometimes "/b/"
* -> Sometimes "/p/"
* No way to know without loading the image page.
*/
public class SmuttyRipper extends AlbumRipper {
private static final String DOMAIN = "smutty.com",
HOST = "smutty";
public SmuttyRipper(URL url) throws IOException {
super(url);
}
@Override
public boolean canRip(URL url) {
return (url.getHost().endsWith(DOMAIN));
}
@Override
public URL sanitizeURL(URL url) throws MalformedURLException {
return url;
}
@Override
public void rip() throws IOException {
int page = 0;
String url, tag = getGID(this.url);
boolean hasNextPage = true;
while (hasNextPage) {
if (isStopped()) {
break;
}
page++;
url = "http://smutty.com/h/" + tag + "/?q=%23" + tag + "&page=" + page + "&sort=date&lazy=1";
this.sendUpdate(STATUS.LOADING_RESOURCE, url);
logger.info(" Retrieving " + url);
Document doc;
try {
doc = Http.url(url)
.ignoreContentType()
.get();
} catch (IOException e) {
if (e.toString().contains("Status=404")) {
logger.info("No more pages to load");
} else {
logger.warn("Exception while loading " + url, e);
}
break;
}
for (Element image : doc.select("a.l > img")) {
if (isStopped()) {
break;
}
String imageUrl = image.attr("src");
// Construct direct link to image based on thumbnail
StringBuilder sb = new StringBuilder();
String[] fields = imageUrl.split("/");
for (int i = 0; i < fields.length; i++) {
if (i == fields.length - 2 && fields[i].equals("m")) {
fields[i] = "b";
}
sb.append(fields[i]);
if (i < fields.length - 1) {
sb.append("/");
}
}
imageUrl = sb.toString();
addURLToDownload(new URL(imageUrl));
}
if (doc.select("#next").size() == 0) {
break; // No more pages
}
// Wait before loading next page
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
logger.error("[!] Interrupted while waiting to load next album:", e);
break;
}
}
waitForThreads();
}
@Override
public String getHost() {
return HOST;
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^https?://smutty\\.com/h/([a-zA-Z0-9\\-_]+).*$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
p = Pattern.compile("^https?://[wm.]*smutty\\.com/search/\\?q=([a-zA-Z0-9\\-_%]+).*$");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1).replace("%23", "");
}
throw new MalformedURLException("Expected tag in URL (smutty.com/h/tag and not " + url);
}
}