Merge pull request #181 from metaprime/chan-update

ChanRipper - use subject as title. closes #180
This commit is contained in:
4_pr0n 2015-12-04 09:49:26 -08:00
commit 5e2f887482

View File

@ -11,6 +11,7 @@ import java.util.regex.Pattern;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.ripper.rippers.ripperhelpers.ChanSite;
@ -60,6 +61,20 @@ public class ChanRipper extends AbstractHTMLRipper {
return host + "_" + board;
}
@Override
public String getAlbumTitle(URL url) throws MalformedURLException {
try {
// Attempt to use album title as GID
Document doc = getFirstPage();
String subject = doc.select(".post.op > .postinfo > .subject").first().text();
return getHost() + "_" + getGID(url) + "_" + subject;
} catch (Exception e) {
// Fall back to default album naming convention
logger.warn("Failed to get album title from " + url, e);
}
return super.getAlbumTitle(url);
}
@Override
public boolean canRip(URL url) {
for (ChanSite _chanSite : explicit_domains) {
@ -72,13 +87,14 @@ public class ChanRipper extends AbstractHTMLRipper {
}
/**
* For example the achrives are all known. (Check 4chan-x)
* For example the archives are all known. (Check 4chan-x)
* Should be based on the software the specific chan uses.
* FoolFuuka uses the same (url) layout as 4chan
* */
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p; Matcher m;
Pattern p;
Matcher m;
String u = url.toExternalForm();
if (u.contains("/thread/") || u.contains("/res/")) {