fix regex on reddit ripper & chan ripper

closes #89
This commit is contained in:
4pr0n 2014-07-20 00:45:40 -07:00
parent 8f72815e59
commit bdf8952f79
3 changed files with 9 additions and 4 deletions

View File

@ -49,7 +49,7 @@ public class ChanRipper extends AbstractHTMLRipper {
String u = url.toExternalForm(); String u = url.toExternalForm();
if (u.contains("/res/")) { if (u.contains("/res/")) {
p = Pattern.compile("^.*(chan|anon-ib).*\\.[a-z]{2,3}/[a-zA-Z0-9]+/res/([0-9]+)(\\.html|\\.php)?.*$"); p = Pattern.compile("^.*(chan|anon-ib).*\\.[a-z]{2,3}/[a-zA-Z0-9/]+/res/([0-9]+)(\\.html|\\.php)?.*$");
m = p.matcher(u); m = p.matcher(u);
if (m.matches()) { if (m.matches()) {
return m.group(2); return m.group(2);

View File

@ -167,7 +167,11 @@ public class RedditRipper extends AlbumRipper {
Pattern p = RipUtils.getURLRegex(); Pattern p = RipUtils.getURLRegex();
Matcher m = p.matcher(body); Matcher m = p.matcher(body);
while (m.find()) { while (m.find()) {
handleURL(m.group(1), id); String url = m.group(1);
while (url.endsWith(")")) {
url = url.substring(0, url.length() - 1);
}
handleURL(url, id);
} }
} }

View File

@ -26,6 +26,7 @@ public class RipUtils {
public static List<URL> getFilesFromURL(URL url) { public static List<URL> getFilesFromURL(URL url) {
List<URL> result = new ArrayList<URL>(); List<URL> result = new ArrayList<URL>();
logger.debug("Checking " + url);
// Imgur album // Imgur album
if ((url.getHost().endsWith("imgur.com")) if ((url.getHost().endsWith("imgur.com"))
&& url.toExternalForm().contains("imgur.com/a/")) { && url.toExternalForm().contains("imgur.com/a/")) {
@ -60,7 +61,7 @@ public class RipUtils {
} }
// Direct link to image // Direct link to image
Pattern p = Pattern.compile("(https?://[a-zA-Z0-9\\-\\.]+\\.[a-zA-Z]{2,3}(/\\S*)\\.(jpg|jpeg|gif|png|mp4))"); Pattern p = Pattern.compile("(https?://[a-zA-Z0-9\\-\\.]+\\.[a-zA-Z]{2,3}(/\\S*)\\.(jpg|jpeg|gif|png|mp4)(\\?.*)?)");
Matcher m = p.matcher(url.toExternalForm()); Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) { if (m.matches()) {
try { try {