VSCO Ripper Single Image
Currently can only support ripping a single Image. More study will be needed to rip member profiles and collections. (Maybe JSON?)
This commit is contained in:
parent
9dbd566340
commit
64a6f9257b
@ -1,17 +1,23 @@
|
|||||||
package com.rarchives.ripme.ripper.rippers;
|
package com.rarchives.ripme.ripper.rippers;
|
||||||
|
|
||||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||||
import com.rarchives.ripme.ripper.AlbumRipper;
|
|
||||||
import com.rarchives.ripme.utils.Http;
|
import com.rarchives.ripme.utils.Http;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.MalformedURLException;
|
import java.net.MalformedURLException;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.logging.Level;
|
||||||
|
import java.util.logging.Logger;
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
import org.jsoup.Jsoup;
|
||||||
|
|
||||||
import org.jsoup.nodes.Document;
|
import org.jsoup.nodes.Document;
|
||||||
import org.jsoup.nodes.Element;
|
import org.jsoup.nodes.Element;
|
||||||
|
import org.jsoup.select.Elements;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* For ripping VSCO members' pages.
|
* For ripping VSCO members' pages.
|
||||||
@ -32,15 +38,15 @@ public class VscoRipper extends AbstractHTMLRipper{
|
|||||||
}
|
}
|
||||||
// Ignores personalized things (e.g. login, feed) and store page
|
// Ignores personalized things (e.g. login, feed) and store page
|
||||||
// Allows links to user profiles and links to images.
|
// Allows links to user profiles and links to images.
|
||||||
//TODO: Add support for journals and collections.
|
//@TODO: Add support for journals and collections.
|
||||||
String u = url.toExternalForm();
|
String u = url.toExternalForm();
|
||||||
return !u.contains("/store") ||
|
return !u.contains("/store/") ||
|
||||||
!u.contains("/feed") ||
|
!u.contains("/feed/") ||
|
||||||
!u.contains("/login") ||
|
!u.contains("/login/") ||
|
||||||
!u.contains("/journal") ||
|
!u.contains("/journal/") ||
|
||||||
!u.contains("/collection")||
|
!u.contains("/collection")||
|
||||||
u.contains("images") ||
|
!u.contains("/images/") ||
|
||||||
u.contains("media");
|
u.contains("/media/");
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -50,9 +56,79 @@ public class VscoRipper extends AbstractHTMLRipper{
|
|||||||
return url;
|
return url;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Recursion FTW
|
||||||
|
* @param page
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
@Override
|
@Override
|
||||||
public void rip() throws IOException {
|
public List<String> getURLsFromPage(Document page){
|
||||||
throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates.
|
List<String> toRip = new ArrayList<>();
|
||||||
|
//If user wanted to rip single image
|
||||||
|
if(url.toString().contains("/media/")){
|
||||||
|
try {
|
||||||
|
toRip.add(vscoImageToURL(url.toExternalForm()));
|
||||||
|
} catch (IOException ex) {
|
||||||
|
logger.debug("Failed to convert " + url.toString() + " to external form.");
|
||||||
|
}
|
||||||
|
|
||||||
|
}else{//want to rip a member profile
|
||||||
|
/*
|
||||||
|
String baseURL = "https://vsco.co";
|
||||||
|
|
||||||
|
|
||||||
|
//Find all the relative links, adds Base URL, then adds them to an ArrayList
|
||||||
|
List<URL> relativeLinks = new ArrayList<>();
|
||||||
|
Elements links = page.getElementsByTag("a");
|
||||||
|
|
||||||
|
|
||||||
|
for(Element link : links){
|
||||||
|
System.out.println(link.toString());
|
||||||
|
//if link includes "/media/", add it to the list
|
||||||
|
if(link.attr("href").contains("/media")){
|
||||||
|
try {
|
||||||
|
String relativeURL = vscoImageToURL(link.attr("href"));
|
||||||
|
toRip.add(baseURL + relativeURL);
|
||||||
|
} catch (IOException ex) {
|
||||||
|
logger.debug("Could not add \"" + link.toString() + "\" to list for ripping.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
logger.debug("Sorry, RipMe currently only supports ripping single images.");
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
return toRip;
|
||||||
|
}
|
||||||
|
|
||||||
|
private String vscoImageToURL(String url) throws IOException{
|
||||||
|
Document page = Jsoup.connect(url).userAgent(USER_AGENT)
|
||||||
|
.get();
|
||||||
|
//create Elements filled only with Elements with the "meta" tag.
|
||||||
|
Elements metaTags = page.getElementsByTag("meta");
|
||||||
|
String result = "";
|
||||||
|
|
||||||
|
for(Element metaTag : metaTags){
|
||||||
|
//find URL inside meta-tag with property of "og:image"
|
||||||
|
if(metaTag.attr("property").equals("og:image")){
|
||||||
|
String givenURL = metaTag.attr("content");
|
||||||
|
givenURL = givenURL.replaceAll("\\?h=[0-9]+", "");//replace the "?h=xxx" tag at the end of the URL (where each x is a number)
|
||||||
|
|
||||||
|
result = givenURL;
|
||||||
|
logger.debug("Found image URL: " + givenURL);
|
||||||
|
break;//immediatly stop after getting URL (there should only be 1 image to be downloaded)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//Means website changed, things need to be fixed.
|
||||||
|
if(result.isEmpty()){
|
||||||
|
logger.error("Could not find image URL at: " + url);
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -62,18 +138,28 @@ public class VscoRipper extends AbstractHTMLRipper{
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getGID(URL url) throws MalformedURLException {
|
public String getGID(URL url) throws MalformedURLException {
|
||||||
|
|
||||||
|
//Single Image
|
||||||
Pattern p = Pattern.compile("^https?://vsco\\.co/([a-zA-Z0-9]+)/media/([a-zA-Z0-9]+)");
|
Pattern p = Pattern.compile("^https?://vsco\\.co/([a-zA-Z0-9]+)/media/([a-zA-Z0-9]+)");
|
||||||
Matcher m = p.matcher(url.toExternalForm());
|
Matcher m = p.matcher(url.toExternalForm());
|
||||||
if (!m.matches()){
|
|
||||||
throw new MalformedURLException("Expected " + DOMAIN + " URL format: " +
|
|
||||||
"vsco.co/username/media/postNumber - got " + url + " instead");
|
|
||||||
|
|
||||||
}
|
if (m.matches()){
|
||||||
// Return the text contained between () in the regex
|
// Return the text contained between () in the regex
|
||||||
String user = m.group(1);
|
String user = m.group(1);
|
||||||
String imageNum = m.group(2);
|
String imageNum = m.group(2).substring(0, 5);//first 5 characters should be enough to make each rip unique
|
||||||
|
|
||||||
return user + "/" + imageNum;
|
return user + "/" + imageNum;
|
||||||
|
}
|
||||||
|
|
||||||
|
//Member profile (Usernames should all be different, so this should work.
|
||||||
|
p = Pattern.compile("^https?://vsco.co/([a-zA-Z0-9]+)/images/[0-9]+");
|
||||||
|
m = p.matcher(url.toExternalForm());
|
||||||
|
|
||||||
|
if(m.matches()){
|
||||||
|
String user = m.group(1);
|
||||||
|
return user;
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new MalformedURLException("Expected a URL to a single image or to a member profile, got " + url + " instead");
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -91,22 +177,6 @@ public class VscoRipper extends AbstractHTMLRipper{
|
|||||||
public Document getNextPage(Document doc) throws IOException {
|
public Document getNextPage(Document doc) throws IOException {
|
||||||
return super.getNextPage(doc);
|
return super.getNextPage(doc);
|
||||||
}
|
}
|
||||||
@Override
|
|
||||||
public List<String> getURLsFromPage(Document page) {
|
|
||||||
List<String> result = new ArrayList<>();
|
|
||||||
|
|
||||||
//get them from page
|
|
||||||
for(Element el : page.select("meta.og:image")){
|
|
||||||
//MUST replace im.vsco instead of just "im" because the URL to image could contain string "im"
|
|
||||||
result.add(
|
|
||||||
el.attr("content").replaceFirst("im.vsco", "images.vsco")); //sanitize
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void downloadURL(URL url, int index) {
|
public void downloadURL(URL url, int index) {
|
||||||
|
Loading…
Reference in New Issue
Block a user