Fixed chan ripper to work with 4chan
This commit is contained in:
parent
e5906db588
commit
0f6b97c73b
@ -38,12 +38,11 @@ public class ChanRipper extends AlbumRipper {
|
|||||||
@Override
|
@Override
|
||||||
public boolean canRip(URL url) {
|
public boolean canRip(URL url) {
|
||||||
// TODO Whitelist?
|
// TODO Whitelist?
|
||||||
return url.getHost().contains("chan") && url.toExternalForm().contains("/res/");
|
return url.getHost().contains("chan") &&
|
||||||
|
( url.toExternalForm().contains("/res/") // Most chans
|
||||||
|
|| url.toExternalForm().contains("/thread/")); // 4chan
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Reformat given URL into the desired format (all images on single page)
|
|
||||||
*/
|
|
||||||
public URL sanitizeURL(URL url) throws MalformedURLException {
|
public URL sanitizeURL(URL url) throws MalformedURLException {
|
||||||
return url;
|
return url;
|
||||||
}
|
}
|
||||||
@ -52,16 +51,26 @@ public class ChanRipper extends AlbumRipper {
|
|||||||
public String getGID(URL url) throws MalformedURLException {
|
public String getGID(URL url) throws MalformedURLException {
|
||||||
Pattern p; Matcher m;
|
Pattern p; Matcher m;
|
||||||
|
|
||||||
p = Pattern.compile("^.*chan.*\\.[a-z]{2,3}/[a-z]+/res/([0-9]+)(\\.html|\\.php)?.*$");
|
String u = url.toExternalForm();
|
||||||
m = p.matcher(url.toExternalForm());
|
if (u.contains("/res/")) {
|
||||||
|
p = Pattern.compile("^.*chan.*\\.[a-z]{2,3}/[a-zA-Z0-9]+/res/([0-9]+)(\\.html|\\.php)?.*$");
|
||||||
|
m = p.matcher(u);
|
||||||
if (m.matches()) {
|
if (m.matches()) {
|
||||||
return m.group(1);
|
return m.group(1);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
else if (u.contains("/thread/")) {
|
||||||
|
p = Pattern.compile("^.*chan.*\\.[a-z]{2,3}/[a-zA-Z0-9]+/thread/([0-9]+)(\\.html|\\.php)?.*$");
|
||||||
|
m = p.matcher(u);
|
||||||
|
if (m.matches()) {
|
||||||
|
return m.group(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
throw new MalformedURLException(
|
throw new MalformedURLException(
|
||||||
"Expected *chan URL formats: "
|
"Expected *chan URL formats: "
|
||||||
+ "*chan.com/@/res/####.html"
|
+ "*chan.com/@/res/####.html"
|
||||||
+ " Got: " + url);
|
+ " Got: " + u);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -77,7 +86,8 @@ public class ChanRipper extends AlbumRipper {
|
|||||||
if (!link.hasAttr("href")) {
|
if (!link.hasAttr("href")) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (!link.attr("href").contains("/src/")) {
|
if (!link.attr("href").contains("/src/")
|
||||||
|
&& !link.attr("href").contains("4cdn.org")) {
|
||||||
logger.debug("Skipping link that does not contain /src/: " + link.attr("href"));
|
logger.debug("Skipping link that does not contain /src/: " + link.attr("href"));
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user