From bdf8952f793968377fc81627b0fa91594b23df9f Mon Sep 17 00:00:00 2001 From: 4pr0n Date: Sun, 20 Jul 2014 00:45:40 -0700 Subject: [PATCH] fix regex on reddit ripper & chan ripper closes #89 --- .../java/com/rarchives/ripme/ripper/rippers/ChanRipper.java | 2 +- .../com/rarchives/ripme/ripper/rippers/RedditRipper.java | 6 +++++- src/main/java/com/rarchives/ripme/utils/RipUtils.java | 5 +++-- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ChanRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ChanRipper.java index 142e9973..6f16d8c4 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/ChanRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ChanRipper.java @@ -49,7 +49,7 @@ public class ChanRipper extends AbstractHTMLRipper { String u = url.toExternalForm(); if (u.contains("/res/")) { - p = Pattern.compile("^.*(chan|anon-ib).*\\.[a-z]{2,3}/[a-zA-Z0-9]+/res/([0-9]+)(\\.html|\\.php)?.*$"); + p = Pattern.compile("^.*(chan|anon-ib).*\\.[a-z]{2,3}/[a-zA-Z0-9/]+/res/([0-9]+)(\\.html|\\.php)?.*$"); m = p.matcher(u); if (m.matches()) { return m.group(2); diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/RedditRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/RedditRipper.java index 4cad6885..7b5c82d1 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/RedditRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/RedditRipper.java @@ -167,7 +167,11 @@ public class RedditRipper extends AlbumRipper { Pattern p = RipUtils.getURLRegex(); Matcher m = p.matcher(body); while (m.find()) { - handleURL(m.group(1), id); + String url = m.group(1); + while (url.endsWith(")")) { + url = url.substring(0, url.length() - 1); + } + handleURL(url, id); } } diff --git a/src/main/java/com/rarchives/ripme/utils/RipUtils.java b/src/main/java/com/rarchives/ripme/utils/RipUtils.java index 57bcd9fa..062404bf 100644 --- a/src/main/java/com/rarchives/ripme/utils/RipUtils.java +++ b/src/main/java/com/rarchives/ripme/utils/RipUtils.java @@ -26,6 +26,7 @@ public class RipUtils { public static List getFilesFromURL(URL url) { List result = new ArrayList(); + logger.debug("Checking " + url); // Imgur album if ((url.getHost().endsWith("imgur.com")) && url.toExternalForm().contains("imgur.com/a/")) { @@ -60,7 +61,7 @@ public class RipUtils { } // Direct link to image - Pattern p = Pattern.compile("(https?://[a-zA-Z0-9\\-\\.]+\\.[a-zA-Z]{2,3}(/\\S*)\\.(jpg|jpeg|gif|png|mp4))"); + Pattern p = Pattern.compile("(https?://[a-zA-Z0-9\\-\\.]+\\.[a-zA-Z]{2,3}(/\\S*)\\.(jpg|jpeg|gif|png|mp4)(\\?.*)?)"); Matcher m = p.matcher(url.toExternalForm()); if (m.matches()) { try { @@ -72,7 +73,7 @@ public class RipUtils { } } - if(url.getHost().equals("imgur.com") || + if (url.getHost().equals("imgur.com") || url.getHost().equals("m.imgur.com")){ try { // Fetch the page