Merge pull request #181 from metaprime/chan-update

ChanRipper - use subject as title. closes #180
This commit is contained in:
4_pr0n 2015-12-04 09:49:26 -08:00
commit 5e2f887482

View File

@ -11,6 +11,7 @@ import java.util.regex.Pattern;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element; import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.rarchives.ripme.ripper.AbstractHTMLRipper; import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.ripper.rippers.ripperhelpers.ChanSite; import com.rarchives.ripme.ripper.rippers.ripperhelpers.ChanSite;
@ -60,6 +61,20 @@ public class ChanRipper extends AbstractHTMLRipper {
return host + "_" + board; return host + "_" + board;
} }
@Override
public String getAlbumTitle(URL url) throws MalformedURLException {
try {
// Attempt to use album title as GID
Document doc = getFirstPage();
String subject = doc.select(".post.op > .postinfo > .subject").first().text();
return getHost() + "_" + getGID(url) + "_" + subject;
} catch (Exception e) {
// Fall back to default album naming convention
logger.warn("Failed to get album title from " + url, e);
}
return super.getAlbumTitle(url);
}
@Override @Override
public boolean canRip(URL url) { public boolean canRip(URL url) {
for (ChanSite _chanSite : explicit_domains) { for (ChanSite _chanSite : explicit_domains) {
@ -72,13 +87,14 @@ public class ChanRipper extends AbstractHTMLRipper {
} }
/** /**
* For example the achrives are all known. (Check 4chan-x) * For example the archives are all known. (Check 4chan-x)
* Should be based on the software the specific chan uses. * Should be based on the software the specific chan uses.
* FoolFuuka uses the same (url) layout as 4chan * FoolFuuka uses the same (url) layout as 4chan
* */ * */
@Override @Override
public String getGID(URL url) throws MalformedURLException { public String getGID(URL url) throws MalformedURLException {
Pattern p; Matcher m; Pattern p;
Matcher m;
String u = url.toExternalForm(); String u = url.toExternalForm();
if (u.contains("/thread/") || u.contains("/res/")) { if (u.contains("/thread/") || u.contains("/res/")) {