2014-03-11 09:29:59 +01:00
|
|
|
package com.rarchives.ripme.utils;
|
|
|
|
|
|
|
|
import java.io.IOException;
|
|
|
|
import java.net.MalformedURLException;
|
|
|
|
import java.net.URL;
|
|
|
|
import java.util.ArrayList;
|
|
|
|
import java.util.List;
|
|
|
|
import java.util.regex.Matcher;
|
|
|
|
import java.util.regex.Pattern;
|
|
|
|
|
|
|
|
import org.apache.log4j.Logger;
|
2014-05-23 04:41:13 +02:00
|
|
|
import org.jsoup.Jsoup;
|
|
|
|
import org.jsoup.nodes.Document;
|
|
|
|
import org.jsoup.nodes.Element;
|
2014-03-11 09:29:59 +01:00
|
|
|
|
2014-05-23 04:41:13 +02:00
|
|
|
import com.rarchives.ripme.ripper.AbstractRipper;
|
2014-03-11 09:29:59 +01:00
|
|
|
import com.rarchives.ripme.ripper.rippers.ImgurRipper;
|
2014-06-25 11:03:47 +02:00
|
|
|
import com.rarchives.ripme.ripper.rippers.VidbleRipper;
|
2014-04-13 01:53:49 +02:00
|
|
|
import com.rarchives.ripme.ripper.rippers.ImgurRipper.ImgurAlbum;
|
|
|
|
import com.rarchives.ripme.ripper.rippers.ImgurRipper.ImgurImage;
|
2014-06-25 11:03:47 +02:00
|
|
|
import com.rarchives.ripme.ripper.rippers.video.GfycatRipper;
|
2014-03-11 09:29:59 +01:00
|
|
|
|
|
|
|
public class RipUtils {
|
|
|
|
private static final Logger logger = Logger.getLogger(RipUtils.class);
|
|
|
|
|
|
|
|
public static List<URL> getFilesFromURL(URL url) {
|
|
|
|
List<URL> result = new ArrayList<URL>();
|
|
|
|
|
|
|
|
// Imgur album
|
2014-06-25 11:03:47 +02:00
|
|
|
if ((url.getHost().endsWith("imgur.com"))
|
2014-03-13 20:14:51 +01:00
|
|
|
&& url.toExternalForm().contains("imgur.com/a/")) {
|
2014-03-11 09:29:59 +01:00
|
|
|
try {
|
2014-04-13 01:53:49 +02:00
|
|
|
ImgurAlbum imgurAlbum = ImgurRipper.getImgurAlbum(url);
|
|
|
|
for (ImgurImage imgurImage : imgurAlbum.images) {
|
|
|
|
result.add(imgurImage.url);
|
|
|
|
}
|
2014-03-11 09:29:59 +01:00
|
|
|
} catch (IOException e) {
|
|
|
|
logger.error("[!] Exception while loading album " + url, e);
|
|
|
|
}
|
2014-06-25 11:03:47 +02:00
|
|
|
return result;
|
|
|
|
}
|
|
|
|
else if (url.getHost().endsWith("gfycat.com")) {
|
|
|
|
try {
|
|
|
|
String videoURL = GfycatRipper.getVideoURL(url);
|
|
|
|
result.add(new URL(videoURL));
|
|
|
|
} catch (IOException e) {
|
|
|
|
// Do nothing
|
|
|
|
logger.warn("Exception while retrieving gfycat page:", e);
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
else if (url.toExternalForm().contains("vidble.com/album/")) {
|
|
|
|
try {
|
|
|
|
result.addAll(VidbleRipper.getURLsFromPage(url));
|
|
|
|
} catch (IOException e) {
|
|
|
|
// Do nothing
|
|
|
|
logger.warn("Exception while retrieving vidble page:", e);
|
|
|
|
}
|
|
|
|
return result;
|
2014-03-11 09:29:59 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// Direct link to image
|
|
|
|
Pattern p = Pattern.compile("(https?://[a-zA-Z0-9\\-\\.]+\\.[a-zA-Z]{2,3}(/\\S*)\\.(jpg|jpeg|gif|png|mp4))");
|
|
|
|
Matcher m = p.matcher(url.toExternalForm());
|
|
|
|
if (m.matches()) {
|
|
|
|
try {
|
|
|
|
URL singleURL = new URL(m.group(1));
|
|
|
|
result.add(singleURL);
|
|
|
|
return result;
|
|
|
|
} catch (MalformedURLException e) {
|
|
|
|
logger.error("[!] Not a valid URL: '" + url + "'", e);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-03-13 20:14:51 +01:00
|
|
|
if(url.getHost().equals("imgur.com") ||
|
|
|
|
url.getHost().equals("m.imgur.com")){
|
|
|
|
try {
|
2014-05-23 04:41:13 +02:00
|
|
|
// Fetch the page
|
|
|
|
Document doc = Jsoup.connect(url.toExternalForm())
|
|
|
|
.userAgent(AbstractRipper.USER_AGENT)
|
|
|
|
.get();
|
|
|
|
for (Element el : doc.select("meta")) {
|
|
|
|
if (el.attr("property").equals("og:image")) {
|
|
|
|
result.add(new URL(el.attr("content")));
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} catch (IOException ex) {
|
|
|
|
logger.error("[!] Error", ex);
|
2014-03-13 20:14:51 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2014-03-11 09:29:59 +01:00
|
|
|
logger.error("[!] Unable to rip URL: " + url);
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
public static Pattern getURLRegex() {
|
|
|
|
return Pattern.compile("(https?://[a-zA-Z0-9\\-\\.]+\\.[a-zA-Z]{2,3}(/\\S*))");
|
|
|
|
}
|
|
|
|
}
|