Get proper imgur file extension when ripping reddit albums

This commit is contained in:
4pr0n 2014-05-22 19:41:13 -07:00
parent 1904f1dde7
commit b800200304

View File

@ -9,7 +9,12 @@ import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.rarchives.ripme.ripper.AbstractRipper;
import com.rarchives.ripme.ripper.rippers.ImgurRipper; import com.rarchives.ripme.ripper.rippers.ImgurRipper;
import com.rarchives.ripme.ripper.rippers.ImgurRipper.ImgurAlbum; import com.rarchives.ripme.ripper.rippers.ImgurRipper.ImgurAlbum;
import com.rarchives.ripme.ripper.rippers.ImgurRipper.ImgurImage; import com.rarchives.ripme.ripper.rippers.ImgurRipper.ImgurImage;
@ -31,6 +36,7 @@ public class RipUtils {
return result; return result;
} catch (IOException e) { } catch (IOException e) {
logger.error("[!] Exception while loading album " + url, e); logger.error("[!] Exception while loading album " + url, e);
return result;
} }
} }
@ -51,10 +57,18 @@ public class RipUtils {
if(url.getHost().equals("imgur.com") || if(url.getHost().equals("imgur.com") ||
url.getHost().equals("m.imgur.com")){ url.getHost().equals("m.imgur.com")){
try { try {
result.add(new URL(url.toExternalForm() + ".png")); // Fetch the page
Document doc = Jsoup.connect(url.toExternalForm())
.userAgent(AbstractRipper.USER_AGENT)
.get();
for (Element el : doc.select("meta")) {
if (el.attr("property").equals("og:image")) {
result.add(new URL(el.attr("content")));
return result; return result;
} catch (MalformedURLException ex) { }
logger.error("[!] Exception while loading album " + url, ex); }
} catch (IOException ex) {
logger.error("[!] Error", ex);
} }
} }