From 67708a37ffc05782dc4be5b0eafe198f84b994e2 Mon Sep 17 00:00:00 2001 From: Christopher Douglas Date: Sun, 22 Feb 2015 21:18:37 -0600 Subject: [PATCH] ChanRipper - use subject as title. closes #180 --- .../ripme/ripper/rippers/ChanRipper.java | 20 +++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ChanRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ChanRipper.java index f9ce7c17..0cf51bed 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/ChanRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ChanRipper.java @@ -11,6 +11,7 @@ import java.util.regex.Pattern; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; import com.rarchives.ripme.ripper.AbstractHTMLRipper; import com.rarchives.ripme.ripper.rippers.ripperhelpers.ChanSite; @@ -60,6 +61,20 @@ public class ChanRipper extends AbstractHTMLRipper { return host + "_" + board; } + @Override + public String getAlbumTitle(URL url) throws MalformedURLException { + try { + // Attempt to use album title as GID + Document doc = getFirstPage(); + String subject = doc.select(".post.op > .postinfo > .subject").first().text(); + return getHost() + "_" + getGID(url) + "_" + subject; + } catch (Exception e) { + // Fall back to default album naming convention + logger.warn("Failed to get album title from " + url, e); + } + return super.getAlbumTitle(url); + } + @Override public boolean canRip(URL url) { for (ChanSite _chanSite : explicit_domains) { @@ -72,13 +87,14 @@ public class ChanRipper extends AbstractHTMLRipper { } /** - * For example the achrives are all known. (Check 4chan-x) + * For example the archives are all known. (Check 4chan-x) * Should be based on the software the specific chan uses. * FoolFuuka uses the same (url) layout as 4chan * */ @Override public String getGID(URL url) throws MalformedURLException { - Pattern p; Matcher m; + Pattern p; + Matcher m; String u = url.toExternalForm(); if (u.contains("/thread/") || u.contains("/res/")) {