diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/VscoRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/VscoRipper.java new file mode 100644 index 00000000..5665ee84 --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/VscoRipper.java @@ -0,0 +1,186 @@ +package com.rarchives.ripme.ripper.rippers; + +import com.rarchives.ripme.ripper.AbstractHTMLRipper; +import com.rarchives.ripme.utils.Http; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.logging.Level; +import java.util.logging.Logger; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.jsoup.Jsoup; + +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +/** + * For ripping VSCO members' pages. + */ +public class VscoRipper extends AbstractHTMLRipper{ + + private static final String DOMAIN = "vsco.co", + HOST = "vsco"; + + public VscoRipper(URL url) throws IOException{ + super(url); + } + + @Override + public boolean canRip(URL url) { + if (!url.getHost().endsWith(DOMAIN)) { + return false; + } + // Ignores personalized things (e.g. login, feed) and store page + // Allows links to user profiles and links to images. + //@TODO: Add support for journals and collections. + String u = url.toExternalForm(); + return !u.contains("/store/") || + !u.contains("/feed/") || + !u.contains("/login/") || + !u.contains("/journal/") || + !u.contains("/collection")|| + !u.contains("/images/") || + u.contains("/media/"); + + } + + @Override + public URL sanitizeURL(URL url) throws MalformedURLException { + //no sanitization needed. + return url; + } + + /** + * Recursion FTW + * @param page + * @return + */ + @Override + public List getURLsFromPage(Document page){ + List toRip = new ArrayList<>(); + //If user wanted to rip single image + if(url.toString().contains("/media/")){ + try { + toRip.add(vscoImageToURL(url.toExternalForm())); + } catch (IOException ex) { + logger.debug("Failed to convert " + url.toString() + " to external form."); + } + + }else{//want to rip a member profile + /* + String baseURL = "https://vsco.co"; + + + //Find all the relative links, adds Base URL, then adds them to an ArrayList + List relativeLinks = new ArrayList<>(); + Elements links = page.getElementsByTag("a"); + + + for(Element link : links){ + System.out.println(link.toString()); + //if link includes "/media/", add it to the list + if(link.attr("href").contains("/media")){ + try { + String relativeURL = vscoImageToURL(link.attr("href")); + toRip.add(baseURL + relativeURL); + } catch (IOException ex) { + logger.debug("Could not add \"" + link.toString() + "\" to list for ripping."); + } + } + } + */ + logger.debug("Sorry, RipMe currently only supports ripping single images."); + + + } + + return toRip; + } + + private String vscoImageToURL(String url) throws IOException{ + Document page = Jsoup.connect(url).userAgent(USER_AGENT) + .get(); + //create Elements filled only with Elements with the "meta" tag. + Elements metaTags = page.getElementsByTag("meta"); + String result = ""; + + for(Element metaTag : metaTags){ + //find URL inside meta-tag with property of "og:image" + if(metaTag.attr("property").equals("og:image")){ + String givenURL = metaTag.attr("content"); + givenURL = givenURL.replaceAll("\\?h=[0-9]+", "");//replace the "?h=xxx" tag at the end of the URL (where each x is a number) + + result = givenURL; + logger.debug("Found image URL: " + givenURL); + break;//immediatly stop after getting URL (there should only be 1 image to be downloaded) + } + } + + //Means website changed, things need to be fixed. + if(result.isEmpty()){ + logger.error("Could not find image URL at: " + url); + } + + return result; + + } + + @Override + public String getHost() { + return HOST; + } + + @Override + public String getGID(URL url) throws MalformedURLException { + + //Single Image + Pattern p = Pattern.compile("^https?://vsco\\.co/([a-zA-Z0-9]+)/media/([a-zA-Z0-9]+)"); + Matcher m = p.matcher(url.toExternalForm()); + + if (m.matches()){ + // Return the text contained between () in the regex + String user = m.group(1); + String imageNum = m.group(2).substring(0, 5);//first 5 characters should be enough to make each rip unique + return user + "/" + imageNum; + } + + //Member profile (Usernames should all be different, so this should work. + p = Pattern.compile("^https?://vsco.co/([a-zA-Z0-9]+)/images/[0-9]+"); + m = p.matcher(url.toExternalForm()); + + if(m.matches()){ + String user = m.group(1); + return user; + } + + throw new MalformedURLException("Expected a URL to a single image or to a member profile, got " + url + " instead"); + + } + + @Override + public String getDomain() { + return DOMAIN; + } + + @Override + public Document getFirstPage() throws IOException { + return Http.url(url).get(); + } + + @Override + public Document getNextPage(Document doc) throws IOException { + return super.getNextPage(doc); + } + + @Override + public void downloadURL(URL url, int index) { + addURLToDownload(url, getPrefix(index)); + } + +} diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/VscoRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/VscoRipperTest.java new file mode 100644 index 00000000..15a5eeef --- /dev/null +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/VscoRipperTest.java @@ -0,0 +1,23 @@ +package com.rarchives.ripme.tst.ripper.rippers; + +import com.rarchives.ripme.ripper.rippers.VscoRipper; +import java.io.IOException; +import java.net.URL; + +public class VscoRipperTest extends RippersTest { + + + public void testSingleImageRip() throws IOException{ + VscoRipper ripper = new VscoRipper(new URL("https://vsco.co/minijello/media/571cd612542220261a123441")); + testRipper(ripper); + } + + public void testGetGID() throws IOException{ + URL url = new URL("https://vsco.co/minijello/media/571cd612542220261a123441"); + + VscoRipper ripper = new VscoRipper(url); + + assertEquals("Failed to get GID", "minijello/571cd", ripper.getGID(url)); + } + +}