From 9fd7bf76632a46d9e22ac8ed5c9981984d6662b8 Mon Sep 17 00:00:00 2001 From: 4pr0n Date: Mon, 21 Dec 2015 14:47:07 -0800 Subject: [PATCH] Fetch imgur series (comma-separated images) in Reddit ripper. Closes #218 --- .../ripme/ripper/rippers/ImgurRipper.java | 41 +++++++++++++++++-- .../com/rarchives/ripme/utils/RipUtils.java | 13 ++++++ .../tst/ripper/rippers/ImgurRipperTest.java | 3 +- 3 files changed, 52 insertions(+), 5 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ImgurRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ImgurRipper.java index 5bca043d..78d9bcd5 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/ImgurRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ImgurRipper.java @@ -159,6 +159,39 @@ public class ImgurRipper extends AlbumRipper { } } + public static ImgurAlbum getImgurSeries(URL url) throws IOException { + Pattern p = Pattern.compile("^.*imgur\\.com/([a-zA-Z0-9,]*).*$"); + Matcher m = p.matcher(url.toExternalForm()); + ImgurAlbum album = new ImgurAlbum(url); + if (m.matches()) { + String[] imageIds = m.group(1).split(","); + for (String imageId : imageIds) { + // TODO: Fetch image with ID imageId + logger.debug("Fetching image info for ID " + imageId);; + try { + JSONObject json = Http.url("https://api.imgur.com/2/image/" + imageId + ".json").getJSON(); + if (!json.has("image")) { + continue; + } + JSONObject image = json.getJSONObject("image"); + if (!image.has("links")) { + continue; + } + JSONObject links = image.getJSONObject("links"); + if (!links.has("original")) { + continue; + } + String original = links.getString("original"); + ImgurImage theImage = new ImgurImage(new URL(original)); + album.addImage(theImage); + } catch (Exception e) { + logger.error("Got exception while fetching imgur ID " + imageId, e); + } + } + } + return album; + } + public static ImgurAlbum getImgurAlbum(URL url) throws IOException { logger.info(" Retrieving " + url.toExternalForm()); Document doc = Jsoup.connect(url.toExternalForm()) @@ -362,7 +395,7 @@ public class ImgurRipper extends AlbumRipper { @Override public String getGID(URL url) throws MalformedURLException { - Pattern p = Pattern.compile("^https?://(m\\.)?imgur\\.com/a/([a-zA-Z0-9]{5,8}).*$"); + Pattern p = Pattern.compile("^https?://(www\\.|m\\.)?imgur\\.com/a/([a-zA-Z0-9]{5,8}).*$"); Matcher m = p.matcher(url.toExternalForm()); if (m.matches()) { // Imgur album @@ -371,7 +404,7 @@ public class ImgurRipper extends AlbumRipper { this.url = new URL("http://imgur.com/a/" + gid); return gid; } - p = Pattern.compile("^https?://(m\\.)?imgur\\.com/gallery/([a-zA-Z0-9]{5,8}).*$"); + p = Pattern.compile("^https?://(www\\.|m\\.)?imgur\\.com/gallery/([a-zA-Z0-9]{5,8}).*$"); m = p.matcher(url.toExternalForm()); if (m.matches()) { // Imgur gallery @@ -405,7 +438,7 @@ public class ImgurRipper extends AlbumRipper { albumType = ALBUM_TYPE.USER_ALBUM; return m.group(1) + "-" + m.group(2); } - p = Pattern.compile("^https?://(www\\.)?imgur\\.com/r/([a-zA-Z0-9\\-_]{3,})(/top|/new)?(/all|/year|/month|/week)?/?$"); + p = Pattern.compile("^https?://(www\\.|m\\.)?imgur\\.com/r/([a-zA-Z0-9\\-_]{3,})(/top|/new)?(/all|/year|/month|/week)?/?$"); m = p.matcher(url.toExternalForm()); if (m.matches()) { // Imgur subreddit aggregator @@ -418,7 +451,7 @@ public class ImgurRipper extends AlbumRipper { } return album; } - p = Pattern.compile("^https?://(i\\.)?imgur\\.com/([a-zA-Z0-9,]{5,}).*$"); + p = Pattern.compile("^https?://(i\\.|www\\.|m\\.)?imgur\\.com/([a-zA-Z0-9,]{5,}).*$"); m = p.matcher(url.toExternalForm()); if (m.matches()) { // Series of imgur images diff --git a/src/main/java/com/rarchives/ripme/utils/RipUtils.java b/src/main/java/com/rarchives/ripme/utils/RipUtils.java index 5a449b64..8e3d5f44 100644 --- a/src/main/java/com/rarchives/ripme/utils/RipUtils.java +++ b/src/main/java/com/rarchives/ripme/utils/RipUtils.java @@ -44,6 +44,19 @@ public class RipUtils { } return result; } + else if (url.getHost().endsWith("imgur.com") && url.toExternalForm().contains(",")) { + // Imgur image series. + try { + logger.debug("Fetching imgur series at " + url); + ImgurAlbum imgurAlbum = ImgurRipper.getImgurSeries(url); + for (ImgurImage imgurImage : imgurAlbum.images) { + logger.debug("Got imgur image: " + imgurImage.url); + result.add(imgurImage.url); + } + } catch (IOException e) { + logger.error("[!] Exception while loading album " + url, e); + } + } else if (url.getHost().endsWith("gfycat.com")) { try { logger.debug("Fetching gfycat page " + url); diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/ImgurRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/ImgurRipperTest.java index 2ce9c307..12eb4dcc 100644 --- a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/ImgurRipperTest.java +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/ImgurRipperTest.java @@ -39,9 +39,10 @@ public class ImgurRipperTest extends RippersTest { contentURLs.add(new URL("http://imgur.com/a/WxG6f/layout/horizontal#0")); contentURLs.add(new URL("http://imgur.com/a/WxG6f/layout/grid#0")); contentURLs.add(new URL("http://imgur.com/gallery/FmP2o")); // Gallery URL + contentURLs.add(new URL("http://imgur.com/758qD43,C6iVJex,bP7flAu,J3l85Ri,1U7fhu5,MbuAUCM,JF4vOXQ")); // Sometimes hangs up //contentURLs.add(new URL("http://imgur.com/r/nsfw_oc/top/all")); - contentURLs.add(new URL("http://imgur.com/a/bXQpH")); // Album with titles/descriptions + //contentURLs.add(new URL("http://imgur.com/a/bXQpH")); // Album with titles/descriptions for (URL url : contentURLs) { ImgurRipper ripper = new ImgurRipper(url); testRipper(ripper);