diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ArtStationRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ArtStationRipper.java new file mode 100644 index 00000000..aeb1991d --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ArtStationRipper.java @@ -0,0 +1,258 @@ +package com.rarchives.ripme.ripper.rippers; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import com.rarchives.ripme.ripper.AbstractJSONRipper; +import com.rarchives.ripme.utils.Http; + +import org.json.JSONObject; + +public class ArtStationRipper extends AbstractJSONRipper { + enum URL_TYPE { + SINGLE_PROJECT, USER_PORTFOLIO, UNKNOWN + } + + private ParsedURL albumURL; + private String projectName; + private Integer projectIndex; + + public ArtStationRipper(URL url) throws IOException { + super(url); + } + + @Override + protected String getDomain() { + return "artstation.com"; + } + + @Override + public String getHost() { + return "ArtStation"; + } + + @Override + public String getGID(URL url) throws MalformedURLException { + JSONObject groupData; + + // Parse URL and store for later use + albumURL = parseURL(url); + + if (albumURL.getType() == URL_TYPE.SINGLE_PROJECT) { + // URL points to single project, use project title as GID + try { + groupData = Http.url(albumURL.getLocation()).getJSON(); + } catch (IOException e) { + throw new MalformedURLException("Couldn't load JSON from " + albumURL.getLocation()); + } + return groupData.getString("title"); + } + + if (albumURL.getType() == URL_TYPE.USER_PORTFOLIO) { + // URL points to user portfolio, use user's full name as GID + String userInfoURL = "https://www.artstation.com/users/" + albumURL.getID() + "/quick.json"; + try { + groupData = Http.url(userInfoURL).getJSON(); + } catch (IOException e) { + throw new MalformedURLException("Couldn't load JSON from " + userInfoURL); + } + return groupData.getString("full_name"); + } + + // No JSON found in the URL entered, can't rip + throw new MalformedURLException( + "Expected URL to an ArtStation project or user profile - got " + url + " instead"); + } + + @Override + protected JSONObject getFirstPage() throws IOException { + if (albumURL.getType() == URL_TYPE.SINGLE_PROJECT) { + // URL points to JSON of a single project, just return it + return Http.url(albumURL.getLocation()).getJSON(); + } + + if (albumURL.getType() == URL_TYPE.USER_PORTFOLIO) { + // URL points to JSON of a list of projects, load it to parse individual + // projects + JSONObject albumContent = Http.url(albumURL.getLocation()).getJSON(); + + if (albumContent.getInt("total_count") > 0) { + // Get JSON of the first project and return it + JSONObject projectInfo = albumContent.getJSONArray("data").getJSONObject(0); + ParsedURL projectURL = parseURL(new URL(projectInfo.getString("permalink"))); + return Http.url(projectURL.getLocation()).getJSON(); + } + } + + throw new IOException("URL specified points to an user with empty portfolio"); + } + + @Override + protected JSONObject getNextPage(JSONObject doc) throws IOException { + if (albumURL.getType() == URL_TYPE.USER_PORTFOLIO) { + // Initialize the index if it hasn't been initialized already + if (projectIndex == null) { + projectIndex = 1; + } + + JSONObject albumContent = Http.url(albumURL.getLocation()).getJSON(); + + if (albumContent.getInt("total_count") > projectIndex) { + // Get JSON of the next project and return it + JSONObject projectInfo = albumContent.getJSONArray("data").getJSONObject(projectIndex); + ParsedURL projectURL = parseURL(new URL(projectInfo.getString("permalink"))); + projectIndex++; + return Http.url(projectURL.getLocation()).getJSON(); + } + + throw new IOException("No more projects"); + } + + throw new IOException("Downloading a single project"); + } + + @Override + protected List getURLsFromJSON(JSONObject json) { + List assetURLs = new ArrayList<>(); + JSONObject currentObject; + + // Update project name variable from JSON data. Used by downloadURL() to create + // subfolders when input URL is URL_TYPE.USER_PORTFOLIO + projectName = json.getString("title"); + + for (int i = 0; i < json.getJSONArray("assets").length(); i++) { + currentObject = json.getJSONArray("assets").getJSONObject(i); + + if (!currentObject.getString("image_url").isEmpty()) { + // TODO: Find a way to rip external content. + // ArtStation hosts only image content, everything else (videos, 3D Models, etc) + // is hosted in other websites and displayed through embedded HTML5 players + assetURLs.add(currentObject.getString("image_url")); + } + } + + return assetURLs; + } + + @Override + protected void downloadURL(URL url, int index) { + if (albumURL.getType() == URL_TYPE.USER_PORTFOLIO) { + // Replace not allowed characters with underlines + String folderName = projectName.replaceAll("[\\\\/:*?\"<>|]", "_"); + + // Folder name also can't end with dots or spaces, strip them + folderName = folderName.replaceAll("\\s+$", ""); + folderName = folderName.replaceAll("\\.+$", ""); + + // Downloading multiple projects, separate each one in subfolders + addURLToDownload(url, "", folderName); + } else { + addURLToDownload(url); + } + } + + @Override + public String normalizeUrl(String url) { + // Strip URL parameters + return url.replaceAll("\\?\\w+$", ""); + } + + private static class ParsedURL { + URL_TYPE urlType; + String jsonURL, urlID; + + /** + * Construct a new ParsedURL object. + * + * @param urlType URL_TYPE enum containing the URL type + * @param jsonURL String containing the JSON URL location + * @param urlID String containing the ID of this URL + * + */ + ParsedURL(URL_TYPE urlType, String jsonURL, String urlID) { + this.urlType = urlType; + this.jsonURL = jsonURL; + this.urlID = urlID; + } + + /** + * Get URL Type of this ParsedURL object. + * + * @return URL_TYPE enum containing this object type + * + */ + URL_TYPE getType() { + return this.urlType; + } + + /** + * Get JSON location of this ParsedURL object. + * + * @return String containing the JSON URL + * + */ + String getLocation() { + return this.jsonURL; + } + + /** + * Get ID of this ParsedURL object. + * + * @return For URL_TYPE.SINGLE_PROJECT, returns the project hash. For + * URL_TYPE.USER_PORTFOLIO, returns the account name + */ + String getID() { + return this.urlID; + } + } + + /** + * Parses an ArtStation URL. + * + * @param url URL to an ArtStation user profile + * (https://www.artstation.com/username) or single project + * (https://www.artstation.com/artwork/projectid) + * @return ParsedURL object containing URL type, JSON location and ID (stores + * account name or project hash, depending of the URL type identified) + * + */ + private ParsedURL parseURL(URL url) { + String htmlSource; + ParsedURL parsedURL; + + // Load HTML Source of the specified URL + try { + htmlSource = Http.url(url).get().html(); + } catch (IOException e) { + htmlSource = ""; + } + + // Check if HTML Source of the specified URL references a project + Pattern p = Pattern.compile("'/projects/(\\w+)\\.json'"); + Matcher m = p.matcher(htmlSource); + if (m.find()) { + parsedURL = new ParsedURL(URL_TYPE.SINGLE_PROJECT, + "https://www.artstation.com/projects/" + m.group(1) + ".json", m.group(1)); + return parsedURL; + } + + // Check if HTML Source of the specified URL references a user profile + p = Pattern.compile("'/users/([\\w-]+)/quick\\.json'"); + m = p.matcher(htmlSource); + if (m.find()) { + parsedURL = new ParsedURL(URL_TYPE.USER_PORTFOLIO, + "https://www.artstation.com/users/" + m.group(1) + "/projects.json", m.group(1)); + return parsedURL; + } + + // HTML Source of the specified URL doesn't reference a user profile or project + parsedURL = new ParsedURL(URL_TYPE.UNKNOWN, null, null); + return parsedURL; + } + +}