ChanRipper - use subject as title. closes #180

This commit is contained in:
Christopher Douglas 2015-02-22 21:18:37 -06:00
parent 6d040aa0d1
commit 67708a37ff

View File

@ -11,6 +11,7 @@ import java.util.regex.Pattern;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.ripper.rippers.ripperhelpers.ChanSite;
@ -60,6 +61,20 @@ public class ChanRipper extends AbstractHTMLRipper {
return host + "_" + board;
}
@Override
public String getAlbumTitle(URL url) throws MalformedURLException {
try {
// Attempt to use album title as GID
Document doc = getFirstPage();
String subject = doc.select(".post.op > .postinfo > .subject").first().text();
return getHost() + "_" + getGID(url) + "_" + subject;
} catch (Exception e) {
// Fall back to default album naming convention
logger.warn("Failed to get album title from " + url, e);
}
return super.getAlbumTitle(url);
}
@Override
public boolean canRip(URL url) {
for (ChanSite _chanSite : explicit_domains) {
@ -72,13 +87,14 @@ public class ChanRipper extends AbstractHTMLRipper {
}
/**
* For example the achrives are all known. (Check 4chan-x)
* For example the archives are all known. (Check 4chan-x)
* Should be based on the software the specific chan uses.
* FoolFuuka uses the same (url) layout as 4chan
* */
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p; Matcher m;
Pattern p;
Matcher m;
String u = url.toExternalForm();
if (u.contains("/thread/") || u.contains("/res/")) {