commit
311b73be39
@ -0,0 +1,258 @@
|
||||
package com.rarchives.ripme.ripper.rippers;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import com.rarchives.ripme.ripper.AbstractJSONRipper;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
|
||||
import org.json.JSONObject;
|
||||
|
||||
public class ArtStationRipper extends AbstractJSONRipper {
|
||||
enum URL_TYPE {
|
||||
SINGLE_PROJECT, USER_PORTFOLIO, UNKNOWN
|
||||
}
|
||||
|
||||
private ParsedURL albumURL;
|
||||
private String projectName;
|
||||
private Integer projectIndex;
|
||||
|
||||
public ArtStationRipper(URL url) throws IOException {
|
||||
super(url);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getDomain() {
|
||||
return "artstation.com";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getHost() {
|
||||
return "ArtStation";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getGID(URL url) throws MalformedURLException {
|
||||
JSONObject groupData;
|
||||
|
||||
// Parse URL and store for later use
|
||||
albumURL = parseURL(url);
|
||||
|
||||
if (albumURL.getType() == URL_TYPE.SINGLE_PROJECT) {
|
||||
// URL points to single project, use project title as GID
|
||||
try {
|
||||
groupData = Http.url(albumURL.getLocation()).getJSON();
|
||||
} catch (IOException e) {
|
||||
throw new MalformedURLException("Couldn't load JSON from " + albumURL.getLocation());
|
||||
}
|
||||
return groupData.getString("title");
|
||||
}
|
||||
|
||||
if (albumURL.getType() == URL_TYPE.USER_PORTFOLIO) {
|
||||
// URL points to user portfolio, use user's full name as GID
|
||||
String userInfoURL = "https://www.artstation.com/users/" + albumURL.getID() + "/quick.json";
|
||||
try {
|
||||
groupData = Http.url(userInfoURL).getJSON();
|
||||
} catch (IOException e) {
|
||||
throw new MalformedURLException("Couldn't load JSON from " + userInfoURL);
|
||||
}
|
||||
return groupData.getString("full_name");
|
||||
}
|
||||
|
||||
// No JSON found in the URL entered, can't rip
|
||||
throw new MalformedURLException(
|
||||
"Expected URL to an ArtStation project or user profile - got " + url + " instead");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected JSONObject getFirstPage() throws IOException {
|
||||
if (albumURL.getType() == URL_TYPE.SINGLE_PROJECT) {
|
||||
// URL points to JSON of a single project, just return it
|
||||
return Http.url(albumURL.getLocation()).getJSON();
|
||||
}
|
||||
|
||||
if (albumURL.getType() == URL_TYPE.USER_PORTFOLIO) {
|
||||
// URL points to JSON of a list of projects, load it to parse individual
|
||||
// projects
|
||||
JSONObject albumContent = Http.url(albumURL.getLocation()).getJSON();
|
||||
|
||||
if (albumContent.getInt("total_count") > 0) {
|
||||
// Get JSON of the first project and return it
|
||||
JSONObject projectInfo = albumContent.getJSONArray("data").getJSONObject(0);
|
||||
ParsedURL projectURL = parseURL(new URL(projectInfo.getString("permalink")));
|
||||
return Http.url(projectURL.getLocation()).getJSON();
|
||||
}
|
||||
}
|
||||
|
||||
throw new IOException("URL specified points to an user with empty portfolio");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected JSONObject getNextPage(JSONObject doc) throws IOException {
|
||||
if (albumURL.getType() == URL_TYPE.USER_PORTFOLIO) {
|
||||
// Initialize the index if it hasn't been initialized already
|
||||
if (projectIndex == null) {
|
||||
projectIndex = 1;
|
||||
}
|
||||
|
||||
JSONObject albumContent = Http.url(albumURL.getLocation()).getJSON();
|
||||
|
||||
if (albumContent.getInt("total_count") > projectIndex) {
|
||||
// Get JSON of the next project and return it
|
||||
JSONObject projectInfo = albumContent.getJSONArray("data").getJSONObject(projectIndex);
|
||||
ParsedURL projectURL = parseURL(new URL(projectInfo.getString("permalink")));
|
||||
projectIndex++;
|
||||
return Http.url(projectURL.getLocation()).getJSON();
|
||||
}
|
||||
|
||||
throw new IOException("No more projects");
|
||||
}
|
||||
|
||||
throw new IOException("Downloading a single project");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<String> getURLsFromJSON(JSONObject json) {
|
||||
List<String> assetURLs = new ArrayList<>();
|
||||
JSONObject currentObject;
|
||||
|
||||
// Update project name variable from JSON data. Used by downloadURL() to create
|
||||
// subfolders when input URL is URL_TYPE.USER_PORTFOLIO
|
||||
projectName = json.getString("title");
|
||||
|
||||
for (int i = 0; i < json.getJSONArray("assets").length(); i++) {
|
||||
currentObject = json.getJSONArray("assets").getJSONObject(i);
|
||||
|
||||
if (!currentObject.getString("image_url").isEmpty()) {
|
||||
// TODO: Find a way to rip external content.
|
||||
// ArtStation hosts only image content, everything else (videos, 3D Models, etc)
|
||||
// is hosted in other websites and displayed through embedded HTML5 players
|
||||
assetURLs.add(currentObject.getString("image_url"));
|
||||
}
|
||||
}
|
||||
|
||||
return assetURLs;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void downloadURL(URL url, int index) {
|
||||
if (albumURL.getType() == URL_TYPE.USER_PORTFOLIO) {
|
||||
// Replace not allowed characters with underlines
|
||||
String folderName = projectName.replaceAll("[\\\\/:*?\"<>|]", "_");
|
||||
|
||||
// Folder name also can't end with dots or spaces, strip them
|
||||
folderName = folderName.replaceAll("\\s+$", "");
|
||||
folderName = folderName.replaceAll("\\.+$", "");
|
||||
|
||||
// Downloading multiple projects, separate each one in subfolders
|
||||
addURLToDownload(url, "", folderName);
|
||||
} else {
|
||||
addURLToDownload(url);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String normalizeUrl(String url) {
|
||||
// Strip URL parameters
|
||||
return url.replaceAll("\\?\\w+$", "");
|
||||
}
|
||||
|
||||
private static class ParsedURL {
|
||||
URL_TYPE urlType;
|
||||
String jsonURL, urlID;
|
||||
|
||||
/**
|
||||
* Construct a new ParsedURL object.
|
||||
*
|
||||
* @param urlType URL_TYPE enum containing the URL type
|
||||
* @param jsonURL String containing the JSON URL location
|
||||
* @param urlID String containing the ID of this URL
|
||||
*
|
||||
*/
|
||||
ParsedURL(URL_TYPE urlType, String jsonURL, String urlID) {
|
||||
this.urlType = urlType;
|
||||
this.jsonURL = jsonURL;
|
||||
this.urlID = urlID;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get URL Type of this ParsedURL object.
|
||||
*
|
||||
* @return URL_TYPE enum containing this object type
|
||||
*
|
||||
*/
|
||||
URL_TYPE getType() {
|
||||
return this.urlType;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get JSON location of this ParsedURL object.
|
||||
*
|
||||
* @return String containing the JSON URL
|
||||
*
|
||||
*/
|
||||
String getLocation() {
|
||||
return this.jsonURL;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get ID of this ParsedURL object.
|
||||
*
|
||||
* @return For URL_TYPE.SINGLE_PROJECT, returns the project hash. For
|
||||
* URL_TYPE.USER_PORTFOLIO, returns the account name
|
||||
*/
|
||||
String getID() {
|
||||
return this.urlID;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses an ArtStation URL.
|
||||
*
|
||||
* @param url URL to an ArtStation user profile
|
||||
* (https://www.artstation.com/username) or single project
|
||||
* (https://www.artstation.com/artwork/projectid)
|
||||
* @return ParsedURL object containing URL type, JSON location and ID (stores
|
||||
* account name or project hash, depending of the URL type identified)
|
||||
*
|
||||
*/
|
||||
private ParsedURL parseURL(URL url) {
|
||||
String htmlSource;
|
||||
ParsedURL parsedURL;
|
||||
|
||||
// Load HTML Source of the specified URL
|
||||
try {
|
||||
htmlSource = Http.url(url).get().html();
|
||||
} catch (IOException e) {
|
||||
htmlSource = "";
|
||||
}
|
||||
|
||||
// Check if HTML Source of the specified URL references a project
|
||||
Pattern p = Pattern.compile("'/projects/(\\w+)\\.json'");
|
||||
Matcher m = p.matcher(htmlSource);
|
||||
if (m.find()) {
|
||||
parsedURL = new ParsedURL(URL_TYPE.SINGLE_PROJECT,
|
||||
"https://www.artstation.com/projects/" + m.group(1) + ".json", m.group(1));
|
||||
return parsedURL;
|
||||
}
|
||||
|
||||
// Check if HTML Source of the specified URL references a user profile
|
||||
p = Pattern.compile("'/users/([\\w-]+)/quick\\.json'");
|
||||
m = p.matcher(htmlSource);
|
||||
if (m.find()) {
|
||||
parsedURL = new ParsedURL(URL_TYPE.USER_PORTFOLIO,
|
||||
"https://www.artstation.com/users/" + m.group(1) + "/projects.json", m.group(1));
|
||||
return parsedURL;
|
||||
}
|
||||
|
||||
// HTML Source of the specified URL doesn't reference a user profile or project
|
||||
parsedURL = new ParsedURL(URL_TYPE.UNKNOWN, null, null);
|
||||
return parsedURL;
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,32 @@
|
||||
package com.rarchives.ripme.tst.ripper.rippers;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import com.rarchives.ripme.ripper.rippers.ArtStationRipper;
|
||||
|
||||
public class ArtStationRipperTest extends RippersTest {
|
||||
|
||||
public void testArtStationProjects() throws IOException {
|
||||
List<URL> contentURLs = new ArrayList<>();
|
||||
contentURLs.add(new URL("https://www.artstation.com/artwork/the-dwarf-mortar"));
|
||||
contentURLs.add(new URL("https://www.artstation.com/artwork/K36GR"));
|
||||
contentURLs.add(new URL("http://artstation.com/artwork/5JJQw"));
|
||||
for (URL url : contentURLs) {
|
||||
ArtStationRipper ripper = new ArtStationRipper(url);
|
||||
testRipper(ripper);
|
||||
}
|
||||
}
|
||||
|
||||
public void testArtStationUserProfiles() throws IOException {
|
||||
List<URL> contentURLs = new ArrayList<>();
|
||||
contentURLs.add(new URL("https://www.artstation.com/heitoramatsu"));
|
||||
contentURLs.add(new URL("http://artstation.com/givemeapiggy"));
|
||||
for (URL url : contentURLs) {
|
||||
ArtStationRipper ripper = new ArtStationRipper(url);
|
||||
testRipper(ripper);
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user