Fetch imgur series (comma-separated images) in Reddit ripper.
Closes #218
This commit is contained in:
parent
0ba092813c
commit
9fd7bf7663
@ -159,6 +159,39 @@ public class ImgurRipper extends AlbumRipper {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static ImgurAlbum getImgurSeries(URL url) throws IOException {
|
||||||
|
Pattern p = Pattern.compile("^.*imgur\\.com/([a-zA-Z0-9,]*).*$");
|
||||||
|
Matcher m = p.matcher(url.toExternalForm());
|
||||||
|
ImgurAlbum album = new ImgurAlbum(url);
|
||||||
|
if (m.matches()) {
|
||||||
|
String[] imageIds = m.group(1).split(",");
|
||||||
|
for (String imageId : imageIds) {
|
||||||
|
// TODO: Fetch image with ID imageId
|
||||||
|
logger.debug("Fetching image info for ID " + imageId);;
|
||||||
|
try {
|
||||||
|
JSONObject json = Http.url("https://api.imgur.com/2/image/" + imageId + ".json").getJSON();
|
||||||
|
if (!json.has("image")) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
JSONObject image = json.getJSONObject("image");
|
||||||
|
if (!image.has("links")) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
JSONObject links = image.getJSONObject("links");
|
||||||
|
if (!links.has("original")) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
String original = links.getString("original");
|
||||||
|
ImgurImage theImage = new ImgurImage(new URL(original));
|
||||||
|
album.addImage(theImage);
|
||||||
|
} catch (Exception e) {
|
||||||
|
logger.error("Got exception while fetching imgur ID " + imageId, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return album;
|
||||||
|
}
|
||||||
|
|
||||||
public static ImgurAlbum getImgurAlbum(URL url) throws IOException {
|
public static ImgurAlbum getImgurAlbum(URL url) throws IOException {
|
||||||
logger.info(" Retrieving " + url.toExternalForm());
|
logger.info(" Retrieving " + url.toExternalForm());
|
||||||
Document doc = Jsoup.connect(url.toExternalForm())
|
Document doc = Jsoup.connect(url.toExternalForm())
|
||||||
@ -362,7 +395,7 @@ public class ImgurRipper extends AlbumRipper {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getGID(URL url) throws MalformedURLException {
|
public String getGID(URL url) throws MalformedURLException {
|
||||||
Pattern p = Pattern.compile("^https?://(m\\.)?imgur\\.com/a/([a-zA-Z0-9]{5,8}).*$");
|
Pattern p = Pattern.compile("^https?://(www\\.|m\\.)?imgur\\.com/a/([a-zA-Z0-9]{5,8}).*$");
|
||||||
Matcher m = p.matcher(url.toExternalForm());
|
Matcher m = p.matcher(url.toExternalForm());
|
||||||
if (m.matches()) {
|
if (m.matches()) {
|
||||||
// Imgur album
|
// Imgur album
|
||||||
@ -371,7 +404,7 @@ public class ImgurRipper extends AlbumRipper {
|
|||||||
this.url = new URL("http://imgur.com/a/" + gid);
|
this.url = new URL("http://imgur.com/a/" + gid);
|
||||||
return gid;
|
return gid;
|
||||||
}
|
}
|
||||||
p = Pattern.compile("^https?://(m\\.)?imgur\\.com/gallery/([a-zA-Z0-9]{5,8}).*$");
|
p = Pattern.compile("^https?://(www\\.|m\\.)?imgur\\.com/gallery/([a-zA-Z0-9]{5,8}).*$");
|
||||||
m = p.matcher(url.toExternalForm());
|
m = p.matcher(url.toExternalForm());
|
||||||
if (m.matches()) {
|
if (m.matches()) {
|
||||||
// Imgur gallery
|
// Imgur gallery
|
||||||
@ -405,7 +438,7 @@ public class ImgurRipper extends AlbumRipper {
|
|||||||
albumType = ALBUM_TYPE.USER_ALBUM;
|
albumType = ALBUM_TYPE.USER_ALBUM;
|
||||||
return m.group(1) + "-" + m.group(2);
|
return m.group(1) + "-" + m.group(2);
|
||||||
}
|
}
|
||||||
p = Pattern.compile("^https?://(www\\.)?imgur\\.com/r/([a-zA-Z0-9\\-_]{3,})(/top|/new)?(/all|/year|/month|/week)?/?$");
|
p = Pattern.compile("^https?://(www\\.|m\\.)?imgur\\.com/r/([a-zA-Z0-9\\-_]{3,})(/top|/new)?(/all|/year|/month|/week)?/?$");
|
||||||
m = p.matcher(url.toExternalForm());
|
m = p.matcher(url.toExternalForm());
|
||||||
if (m.matches()) {
|
if (m.matches()) {
|
||||||
// Imgur subreddit aggregator
|
// Imgur subreddit aggregator
|
||||||
@ -418,7 +451,7 @@ public class ImgurRipper extends AlbumRipper {
|
|||||||
}
|
}
|
||||||
return album;
|
return album;
|
||||||
}
|
}
|
||||||
p = Pattern.compile("^https?://(i\\.)?imgur\\.com/([a-zA-Z0-9,]{5,}).*$");
|
p = Pattern.compile("^https?://(i\\.|www\\.|m\\.)?imgur\\.com/([a-zA-Z0-9,]{5,}).*$");
|
||||||
m = p.matcher(url.toExternalForm());
|
m = p.matcher(url.toExternalForm());
|
||||||
if (m.matches()) {
|
if (m.matches()) {
|
||||||
// Series of imgur images
|
// Series of imgur images
|
||||||
|
@ -44,6 +44,19 @@ public class RipUtils {
|
|||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
else if (url.getHost().endsWith("imgur.com") && url.toExternalForm().contains(",")) {
|
||||||
|
// Imgur image series.
|
||||||
|
try {
|
||||||
|
logger.debug("Fetching imgur series at " + url);
|
||||||
|
ImgurAlbum imgurAlbum = ImgurRipper.getImgurSeries(url);
|
||||||
|
for (ImgurImage imgurImage : imgurAlbum.images) {
|
||||||
|
logger.debug("Got imgur image: " + imgurImage.url);
|
||||||
|
result.add(imgurImage.url);
|
||||||
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
logger.error("[!] Exception while loading album " + url, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
else if (url.getHost().endsWith("gfycat.com")) {
|
else if (url.getHost().endsWith("gfycat.com")) {
|
||||||
try {
|
try {
|
||||||
logger.debug("Fetching gfycat page " + url);
|
logger.debug("Fetching gfycat page " + url);
|
||||||
|
@ -39,9 +39,10 @@ public class ImgurRipperTest extends RippersTest {
|
|||||||
contentURLs.add(new URL("http://imgur.com/a/WxG6f/layout/horizontal#0"));
|
contentURLs.add(new URL("http://imgur.com/a/WxG6f/layout/horizontal#0"));
|
||||||
contentURLs.add(new URL("http://imgur.com/a/WxG6f/layout/grid#0"));
|
contentURLs.add(new URL("http://imgur.com/a/WxG6f/layout/grid#0"));
|
||||||
contentURLs.add(new URL("http://imgur.com/gallery/FmP2o")); // Gallery URL
|
contentURLs.add(new URL("http://imgur.com/gallery/FmP2o")); // Gallery URL
|
||||||
|
contentURLs.add(new URL("http://imgur.com/758qD43,C6iVJex,bP7flAu,J3l85Ri,1U7fhu5,MbuAUCM,JF4vOXQ"));
|
||||||
// Sometimes hangs up
|
// Sometimes hangs up
|
||||||
//contentURLs.add(new URL("http://imgur.com/r/nsfw_oc/top/all"));
|
//contentURLs.add(new URL("http://imgur.com/r/nsfw_oc/top/all"));
|
||||||
contentURLs.add(new URL("http://imgur.com/a/bXQpH")); // Album with titles/descriptions
|
//contentURLs.add(new URL("http://imgur.com/a/bXQpH")); // Album with titles/descriptions
|
||||||
for (URL url : contentURLs) {
|
for (URL url : contentURLs) {
|
||||||
ImgurRipper ripper = new ImgurRipper(url);
|
ImgurRipper ripper = new ImgurRipper(url);
|
||||||
testRipper(ripper);
|
testRipper(ripper);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user