fix regex on reddit ripper & chan ripper

closes #89
This commit is contained in:
4pr0n 2014-07-20 00:45:40 -07:00
parent 8f72815e59
commit bdf8952f79
3 changed files with 9 additions and 4 deletions

View File

@ -49,7 +49,7 @@ public class ChanRipper extends AbstractHTMLRipper {
String u = url.toExternalForm();
if (u.contains("/res/")) {
p = Pattern.compile("^.*(chan|anon-ib).*\\.[a-z]{2,3}/[a-zA-Z0-9]+/res/([0-9]+)(\\.html|\\.php)?.*$");
p = Pattern.compile("^.*(chan|anon-ib).*\\.[a-z]{2,3}/[a-zA-Z0-9/]+/res/([0-9]+)(\\.html|\\.php)?.*$");
m = p.matcher(u);
if (m.matches()) {
return m.group(2);

View File

@ -167,7 +167,11 @@ public class RedditRipper extends AlbumRipper {
Pattern p = RipUtils.getURLRegex();
Matcher m = p.matcher(body);
while (m.find()) {
handleURL(m.group(1), id);
String url = m.group(1);
while (url.endsWith(")")) {
url = url.substring(0, url.length() - 1);
}
handleURL(url, id);
}
}

View File

@ -26,6 +26,7 @@ public class RipUtils {
public static List<URL> getFilesFromURL(URL url) {
List<URL> result = new ArrayList<URL>();
logger.debug("Checking " + url);
// Imgur album
if ((url.getHost().endsWith("imgur.com"))
&& url.toExternalForm().contains("imgur.com/a/")) {
@ -60,7 +61,7 @@ public class RipUtils {
}
// Direct link to image
Pattern p = Pattern.compile("(https?://[a-zA-Z0-9\\-\\.]+\\.[a-zA-Z]{2,3}(/\\S*)\\.(jpg|jpeg|gif|png|mp4))");
Pattern p = Pattern.compile("(https?://[a-zA-Z0-9\\-\\.]+\\.[a-zA-Z]{2,3}(/\\S*)\\.(jpg|jpeg|gif|png|mp4)(\\?.*)?)");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
try {
@ -72,7 +73,7 @@ public class RipUtils {
}
}
if(url.getHost().equals("imgur.com") ||
if (url.getHost().equals("imgur.com") ||
url.getHost().equals("m.imgur.com")){
try {
// Fetch the page