2014-03-11 09:29:59 +01:00
|
|
|
package com.rarchives.ripme.utils;
|
|
|
|
|
|
|
|
import java.io.IOException;
|
|
|
|
import java.net.MalformedURLException;
|
|
|
|
import java.net.URL;
|
|
|
|
import java.util.ArrayList;
|
2015-02-10 07:18:09 +01:00
|
|
|
import java.util.Arrays;
|
2014-03-11 09:29:59 +01:00
|
|
|
import java.util.List;
|
|
|
|
import java.util.regex.Matcher;
|
|
|
|
import java.util.regex.Pattern;
|
|
|
|
|
2015-02-10 07:18:09 +01:00
|
|
|
import org.apache.commons.lang.math.NumberUtils;
|
2014-03-11 09:29:59 +01:00
|
|
|
import org.apache.log4j.Logger;
|
2014-05-23 04:41:13 +02:00
|
|
|
import org.jsoup.Jsoup;
|
|
|
|
import org.jsoup.nodes.Document;
|
|
|
|
import org.jsoup.nodes.Element;
|
2014-03-11 09:29:59 +01:00
|
|
|
|
2014-05-23 04:41:13 +02:00
|
|
|
import com.rarchives.ripme.ripper.AbstractRipper;
|
2014-03-11 09:29:59 +01:00
|
|
|
import com.rarchives.ripme.ripper.rippers.ImgurRipper;
|
2014-04-13 01:53:49 +02:00
|
|
|
import com.rarchives.ripme.ripper.rippers.ImgurRipper.ImgurAlbum;
|
|
|
|
import com.rarchives.ripme.ripper.rippers.ImgurRipper.ImgurImage;
|
2015-02-10 07:18:09 +01:00
|
|
|
import com.rarchives.ripme.ripper.rippers.VidbleRipper;
|
2014-06-25 11:03:47 +02:00
|
|
|
import com.rarchives.ripme.ripper.rippers.video.GfycatRipper;
|
2014-03-11 09:29:59 +01:00
|
|
|
|
|
|
|
public class RipUtils {
|
|
|
|
private static final Logger logger = Logger.getLogger(RipUtils.class);
|
|
|
|
|
|
|
|
public static List<URL> getFilesFromURL(URL url) {
|
|
|
|
List<URL> result = new ArrayList<URL>();
|
|
|
|
|
2014-07-20 09:45:40 +02:00
|
|
|
logger.debug("Checking " + url);
|
2014-03-11 09:29:59 +01:00
|
|
|
// Imgur album
|
2014-06-25 11:03:47 +02:00
|
|
|
if ((url.getHost().endsWith("imgur.com"))
|
2014-03-13 20:14:51 +01:00
|
|
|
&& url.toExternalForm().contains("imgur.com/a/")) {
|
2014-03-11 09:29:59 +01:00
|
|
|
try {
|
2014-04-13 01:53:49 +02:00
|
|
|
ImgurAlbum imgurAlbum = ImgurRipper.getImgurAlbum(url);
|
|
|
|
for (ImgurImage imgurImage : imgurAlbum.images) {
|
|
|
|
result.add(imgurImage.url);
|
|
|
|
}
|
2014-03-11 09:29:59 +01:00
|
|
|
} catch (IOException e) {
|
|
|
|
logger.error("[!] Exception while loading album " + url, e);
|
|
|
|
}
|
2014-06-25 11:03:47 +02:00
|
|
|
return result;
|
|
|
|
}
|
|
|
|
else if (url.getHost().endsWith("gfycat.com")) {
|
|
|
|
try {
|
|
|
|
String videoURL = GfycatRipper.getVideoURL(url);
|
|
|
|
result.add(new URL(videoURL));
|
|
|
|
} catch (IOException e) {
|
|
|
|
// Do nothing
|
|
|
|
logger.warn("Exception while retrieving gfycat page:", e);
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
else if (url.toExternalForm().contains("vidble.com/album/")) {
|
|
|
|
try {
|
|
|
|
result.addAll(VidbleRipper.getURLsFromPage(url));
|
|
|
|
} catch (IOException e) {
|
|
|
|
// Do nothing
|
|
|
|
logger.warn("Exception while retrieving vidble page:", e);
|
|
|
|
}
|
|
|
|
return result;
|
2014-03-11 09:29:59 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// Direct link to image
|
2014-07-20 09:45:40 +02:00
|
|
|
Pattern p = Pattern.compile("(https?://[a-zA-Z0-9\\-\\.]+\\.[a-zA-Z]{2,3}(/\\S*)\\.(jpg|jpeg|gif|png|mp4)(\\?.*)?)");
|
2014-03-11 09:29:59 +01:00
|
|
|
Matcher m = p.matcher(url.toExternalForm());
|
|
|
|
if (m.matches()) {
|
|
|
|
try {
|
|
|
|
URL singleURL = new URL(m.group(1));
|
|
|
|
result.add(singleURL);
|
|
|
|
return result;
|
|
|
|
} catch (MalformedURLException e) {
|
|
|
|
logger.error("[!] Not a valid URL: '" + url + "'", e);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-07-20 09:45:40 +02:00
|
|
|
if (url.getHost().equals("imgur.com") ||
|
2014-03-13 20:14:51 +01:00
|
|
|
url.getHost().equals("m.imgur.com")){
|
|
|
|
try {
|
2014-05-23 04:41:13 +02:00
|
|
|
// Fetch the page
|
|
|
|
Document doc = Jsoup.connect(url.toExternalForm())
|
|
|
|
.userAgent(AbstractRipper.USER_AGENT)
|
|
|
|
.get();
|
|
|
|
for (Element el : doc.select("meta")) {
|
2015-01-11 09:40:56 +01:00
|
|
|
if (el.attr("name").equals("twitter:image:src")) {
|
2014-05-23 04:41:13 +02:00
|
|
|
result.add(new URL(el.attr("content")));
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} catch (IOException ex) {
|
|
|
|
logger.error("[!] Error", ex);
|
2014-03-13 20:14:51 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2014-03-11 09:29:59 +01:00
|
|
|
logger.error("[!] Unable to rip URL: " + url);
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
public static Pattern getURLRegex() {
|
|
|
|
return Pattern.compile("(https?://[a-zA-Z0-9\\-\\.]+\\.[a-zA-Z]{2,3}(/\\S*))");
|
|
|
|
}
|
2015-02-10 07:18:09 +01:00
|
|
|
|
|
|
|
public static String urlFromDirectoryName(String dir) {
|
|
|
|
String url = null;
|
|
|
|
if (url == null) url = urlFromImgurDirectoryName(dir);
|
|
|
|
if (url == null) url = urlFromImagefapDirectoryName(dir);
|
|
|
|
if (url == null) url = urlFromDeviantartDirectoryName(dir);
|
|
|
|
if (url == null) url = urlFromRedditDirectoryName(dir);
|
|
|
|
if (url == null) url = urlFromSiteDirectoryName(dir, "bfcakes", "http://www.bcfakes.com/celebritylist/", "");
|
|
|
|
if (url == null) url = urlFromSiteDirectoryName(dir, "butttoucher", "http://butttoucher.com/users/", "");
|
|
|
|
if (url == null) url = urlFromSiteDirectoryName(dir, "cheeby", "http://cheeby.com/u/", "");
|
|
|
|
if (url == null) url = urlFromSiteDirectoryName(dir, "datwin", "http://datw.in/", "");
|
|
|
|
if (url == null) url = urlFromSiteDirectoryName(dir, "drawcrowd", "http://drawcrowd.com/", "");
|
|
|
|
if (url == null) url = urlFromSiteDirectoryName(dir.replace("-", "/"), "ehentai", "http://g.e-hentai.org/g/", "");
|
|
|
|
if (url == null) url = urlFromSiteDirectoryName(dir, "8muses", "http://www.8muses.com/index/category/", "");
|
|
|
|
if (url == null) url = urlFromSiteDirectoryName(dir, "fapproved", "http://fapproved.com/users/", "");
|
|
|
|
if (url == null) url = urlFromSiteDirectoryName(dir, "vinebox", "http://finebox.co/u/", "");
|
|
|
|
/*
|
|
|
|
if (url == null) url = urlFromSiteDirectoryName(dir, "", "", "");
|
|
|
|
if (url == null) url = urlFromSiteDirectoryName(dir, "", "", "");
|
|
|
|
if (url == null) url = urlFromSiteDirectoryName(dir, "", "", "");
|
|
|
|
if (url == null) url = urlFromSiteDirectoryName(dir, "", "", "");
|
|
|
|
if (url == null) url = urlFromSiteDirectoryName(dir, "", "", "");
|
|
|
|
if (url == null) url = urlFromSiteDirectoryName(dir, "", "", "");
|
|
|
|
if (url == null) url = urlFromSiteDirectoryName(dir, "", "", "");
|
|
|
|
if (url == null) url = urlFromSiteDirectoryName(dir, "", "", "");
|
|
|
|
if (url == null) url = urlFromSiteDirectoryName(dir, "", "", "");
|
|
|
|
if (url == null) url = urlFromSiteDirectoryName(dir, "", "", "");
|
|
|
|
*/
|
|
|
|
return url;
|
|
|
|
}
|
|
|
|
|
|
|
|
private static String urlFromSiteDirectoryName(String dir, String site, String before, String after) {
|
|
|
|
if (!dir.startsWith(site + "_")) {
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
dir = dir.substring((site + "_").length());
|
|
|
|
return before + dir + after;
|
|
|
|
}
|
|
|
|
|
|
|
|
private static String urlFromRedditDirectoryName(String dir) {
|
|
|
|
if (!dir.startsWith("reddit_")) {
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
String url = null;
|
|
|
|
String[] fields = dir.split("_");
|
|
|
|
if (fields[0].equals("sub")) {
|
|
|
|
url = "http://reddit.com/r/" + dir;
|
|
|
|
}
|
|
|
|
else if (fields[0].equals("user")) {
|
|
|
|
url = "http://reddit.com/user/" + dir;
|
|
|
|
}
|
|
|
|
else if (fields[0].equals("post")) {
|
|
|
|
url = "http://reddit.com/comments/" + dir;
|
|
|
|
}
|
|
|
|
return url;
|
|
|
|
}
|
|
|
|
|
|
|
|
private static String urlFromImagefapDirectoryName(String dir) {
|
|
|
|
if (!dir.startsWith("imagefap")) {
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
String url = null;
|
|
|
|
dir = dir.substring("imagefap_".length());
|
|
|
|
if (NumberUtils.isDigits(dir)) {
|
|
|
|
url = "http://www.imagefap.com/gallery.php?gid=" + dir;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
url = "http://www.imagefap.com/gallery.php?pgid=" + dir;
|
|
|
|
}
|
|
|
|
return url;
|
|
|
|
}
|
|
|
|
|
|
|
|
private static String urlFromDeviantartDirectoryName(String dir) {
|
|
|
|
if (!dir.startsWith("deviantart")) {
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
dir = dir.substring("deviantart_".length());
|
|
|
|
String url = null;
|
|
|
|
if (!dir.contains("_")) {
|
|
|
|
url = "http://" + dir + ".deviantart.com/";
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
String[] fields = dir.split("_");
|
|
|
|
url = "http://" + fields[0] + ".deviantart.com/gallery/" + fields[1];
|
|
|
|
}
|
|
|
|
return url;
|
|
|
|
}
|
|
|
|
|
|
|
|
private static String urlFromImgurDirectoryName(String dir) {
|
|
|
|
if (!dir.startsWith("imgur_")) {
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
if (dir.contains(" ")) {
|
|
|
|
dir = dir.substring(0, dir.indexOf(" "));
|
|
|
|
}
|
|
|
|
List<String> fields = Arrays.asList(dir.split("_"));
|
|
|
|
String album = fields.get(1);
|
|
|
|
String url = "http://";
|
|
|
|
if ( (fields.contains("top") || fields.contains("new"))
|
|
|
|
&& (fields.contains("year") || fields.contains("month") || fields.contains("week") || fields.contains("all"))
|
|
|
|
) {
|
|
|
|
// Subreddit
|
|
|
|
fields.remove(0); // "imgur"
|
|
|
|
String sub = "";
|
|
|
|
while (fields.size() > 2) {
|
|
|
|
if (!sub.equals("")) {
|
|
|
|
sub += "_";
|
|
|
|
}
|
|
|
|
sub = fields.remove(0); // Subreddit that may contain "_"
|
|
|
|
}
|
|
|
|
url += "imgur.com/r/" + sub + "/";
|
|
|
|
url += fields.remove(0) + "/";
|
|
|
|
url += fields.remove(0);
|
|
|
|
}
|
|
|
|
else if (album.contains("-")) {
|
|
|
|
// Series of images
|
|
|
|
url += "imgur.com/" + album.replaceAll("-", ",");
|
|
|
|
}
|
|
|
|
else if (album.length() == 5 || album.length() == 6) {
|
|
|
|
// Album
|
|
|
|
url += "imgur.com/a/" + album;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
// User account
|
|
|
|
url += album + ".imgur.com/";
|
|
|
|
if (fields.size() > 2) {
|
|
|
|
url += fields.get(2);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return url;
|
|
|
|
}
|
2014-03-11 09:29:59 +01:00
|
|
|
}
|