2014-06-28 09:53:17 +02:00
|
|
|
package com.rarchives.ripme.ripper.rippers;
|
|
|
|
|
2015-02-10 08:29:29 +01:00
|
|
|
import java.io.File;
|
2014-06-28 09:53:17 +02:00
|
|
|
import java.io.IOException;
|
2014-06-28 10:25:03 +02:00
|
|
|
import java.net.HttpURLConnection;
|
2014-06-28 09:53:17 +02:00
|
|
|
import java.net.MalformedURLException;
|
|
|
|
import java.net.URL;
|
|
|
|
import java.util.ArrayList;
|
|
|
|
import java.util.List;
|
|
|
|
import java.util.regex.Matcher;
|
|
|
|
import java.util.regex.Pattern;
|
|
|
|
|
|
|
|
import org.json.JSONArray;
|
|
|
|
import org.json.JSONObject;
|
2015-12-22 16:47:58 +01:00
|
|
|
import org.jsoup.nodes.Document;
|
|
|
|
import org.jsoup.select.Elements;
|
2014-06-28 09:53:17 +02:00
|
|
|
|
|
|
|
import com.rarchives.ripme.ripper.AbstractJSONRipper;
|
|
|
|
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
|
|
|
import com.rarchives.ripme.utils.Http;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* https://github.com/500px/api-documentation
|
|
|
|
* http://500px.com/tsyganov/stories/80675/galya ("blog")
|
|
|
|
* http://500px.com/tsyganov/stories ("blogs") - get HTML, parse stories
|
|
|
|
* http://500px.com/tsyganov/favorites
|
|
|
|
* http://500px.com/tsyganov (photos)
|
|
|
|
* https://api.500px.com/v1/photo
|
|
|
|
* ?rpp=100
|
|
|
|
* &feature=user
|
|
|
|
* &image_size=3
|
|
|
|
* &page=3
|
|
|
|
* &sort=created_at
|
|
|
|
* &include_states=false
|
|
|
|
* &user_id=1913159
|
|
|
|
* &consumer_key=XPm2br2zGBq6TOfd2xbDIHYoLnt3cLxr1HYryGCv
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
public class FivehundredpxRipper extends AbstractJSONRipper {
|
|
|
|
|
|
|
|
private int page = 1;
|
|
|
|
private String baseURL = "https://api.500px.com/v1";
|
|
|
|
private static final String CONSUMER_KEY = "XPm2br2zGBq6TOfd2xbDIHYoLnt3cLxr1HYryGCv";
|
|
|
|
|
|
|
|
public FivehundredpxRipper(URL url) throws IOException {
|
|
|
|
super(url);
|
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public String getHost() {
|
|
|
|
return "500px";
|
|
|
|
}
|
|
|
|
@Override
|
|
|
|
public String getDomain() {
|
|
|
|
return "500px.com";
|
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public String getGID(URL url) throws MalformedURLException {
|
|
|
|
Pattern p; Matcher m;
|
|
|
|
|
|
|
|
// http://500px.com/tsyganov/stories/80675/galya ("blog")
|
|
|
|
p = Pattern.compile("^.*500px.com/([a-zA-Z0-9\\-_]+)/stories/([0-9]+).*$");
|
|
|
|
m = p.matcher(url.toExternalForm());
|
|
|
|
if (m.matches()) {
|
|
|
|
String username = m.group(1),
|
|
|
|
blogid = m.group(2);
|
|
|
|
baseURL += "/blogs/" + blogid
|
|
|
|
+ "?feature=user"
|
|
|
|
+ "&username=" + username
|
2014-06-28 10:25:03 +02:00
|
|
|
+ "&image_size=5"
|
2014-06-28 09:53:17 +02:00
|
|
|
+ "&rpp=100";
|
|
|
|
return username + "_stories_" + blogid;
|
|
|
|
}
|
|
|
|
|
|
|
|
// http://500px.com/tsyganov/stories ("blogs")
|
|
|
|
p = Pattern.compile("^.*500px.com/([a-zA-Z0-9\\-_]+)/stories/?$");
|
|
|
|
m = p.matcher(url.toExternalForm());
|
|
|
|
if (m.matches()) {
|
|
|
|
String username = m.group(1);
|
|
|
|
baseURL += "/blogs"
|
|
|
|
+ "?feature=user"
|
|
|
|
+ "&username=" + username
|
|
|
|
+ "&rpp=100";
|
|
|
|
return username + "_stories";
|
|
|
|
}
|
|
|
|
|
|
|
|
// http://500px.com/tsyganov/favorites
|
|
|
|
p = Pattern.compile("^.*500px.com/([a-zA-Z0-9\\-_]+)/favorites/?$");
|
|
|
|
m = p.matcher(url.toExternalForm());
|
|
|
|
if (m.matches()) {
|
|
|
|
String username = m.group(1);
|
|
|
|
baseURL += "/photos"
|
|
|
|
+ "?feature=user_favorites"
|
|
|
|
+ "&username=" + username
|
|
|
|
+ "&rpp=100"
|
2014-06-28 10:25:03 +02:00
|
|
|
+ "&image_size=5";
|
2014-06-28 09:53:17 +02:00
|
|
|
return username + "_faves";
|
|
|
|
}
|
|
|
|
|
2016-04-17 14:44:25 +02:00
|
|
|
// http://500px.com/tsyganov/galleries
|
|
|
|
p = Pattern.compile("^.*500px.com/([a-zA-Z0-9\\-_]+)/galleries/?$");
|
|
|
|
m = p.matcher(url.toExternalForm());
|
|
|
|
if (m.matches()) {
|
|
|
|
String username = m.group(1);
|
|
|
|
String userID;
|
|
|
|
try {
|
|
|
|
userID = getUserID(username);
|
|
|
|
} catch (IOException e) {
|
|
|
|
throw new MalformedURLException("Unable to get User ID from username (" + username + ")");
|
|
|
|
}
|
|
|
|
baseURL += "/users/" + userID + "/galleries"
|
|
|
|
+ "?rpp=100";
|
|
|
|
return username + "_galleries";
|
|
|
|
}
|
|
|
|
|
|
|
|
// https://500px.com/getesmart86/galleries/olga
|
|
|
|
p = Pattern.compile("^.*500px.com/([a-zA-Z0-9\\-_]+)/galleries/([a-zA-Z0-9\\-_]+)/?$");
|
|
|
|
m = p.matcher(url.toExternalForm());
|
|
|
|
if (m.matches()) {
|
|
|
|
String username = m.group(1);
|
|
|
|
String subgallery = m.group(2);
|
|
|
|
String userID;
|
|
|
|
try {
|
|
|
|
userID = getUserID(username);
|
|
|
|
} catch (IOException e) {
|
|
|
|
throw new MalformedURLException("Unable to get User ID from username (" + username + ")");
|
|
|
|
}
|
|
|
|
baseURL += "/users/" + userID + "/galleries/" + subgallery + "/items"
|
|
|
|
+ "?rpp=100"
|
|
|
|
+ "&image_size=5";
|
|
|
|
return username + "_galleries_" + subgallery;
|
|
|
|
}
|
|
|
|
|
2014-06-28 09:53:17 +02:00
|
|
|
// http://500px.com/tsyganov (photos)
|
|
|
|
p = Pattern.compile("^.*500px.com/([a-zA-Z0-9\\-_]+)/?$");
|
|
|
|
m = p.matcher(url.toExternalForm());
|
|
|
|
if (m.matches()) {
|
|
|
|
String username = m.group(1);
|
|
|
|
baseURL += "/photos"
|
|
|
|
+ "?feature=user"
|
|
|
|
+ "&username=" + username
|
|
|
|
+ "&rpp=100"
|
2014-06-28 10:25:03 +02:00
|
|
|
+ "&image_size=5";
|
|
|
|
return username;
|
2014-06-28 09:53:17 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
throw new MalformedURLException(
|
|
|
|
"Expected 500px.com gallery formats: "
|
|
|
|
+ "/stories/### /stories /favorites /"
|
|
|
|
+ " Got: " + url);
|
|
|
|
}
|
|
|
|
|
2016-04-17 14:44:25 +02:00
|
|
|
/** Convert username to UserID. */
|
|
|
|
private String getUserID(String username) throws IOException {
|
|
|
|
logger.info("Fetching user ID for " + username);
|
|
|
|
JSONObject json = new Http("https://api.500px.com/v1/" +
|
2017-05-10 00:22:55 +02:00
|
|
|
"users/show" +
|
2016-04-17 14:44:25 +02:00
|
|
|
"?username=" + username +
|
|
|
|
"&consumer_key=" + CONSUMER_KEY)
|
|
|
|
.getJSON();
|
|
|
|
return Long.toString(json.getJSONObject("user").getLong("id"));
|
|
|
|
}
|
|
|
|
|
2014-06-28 09:53:17 +02:00
|
|
|
@Override
|
|
|
|
public JSONObject getFirstPage() throws IOException {
|
|
|
|
URL apiURL = new URL(baseURL + "&consumer_key=" + CONSUMER_KEY);
|
2015-02-10 08:29:29 +01:00
|
|
|
logger.debug("apiURL: " + apiURL);
|
2014-06-28 09:53:17 +02:00
|
|
|
JSONObject json = Http.url(apiURL).getJSON();
|
2016-04-17 14:44:25 +02:00
|
|
|
|
|
|
|
if (baseURL.contains("/galleries?")) {
|
|
|
|
// We're in the root /galleries folder, need to get all images from all galleries.
|
|
|
|
JSONObject result = new JSONObject();
|
|
|
|
result.put("photos", new JSONArray());
|
|
|
|
// Iterate over every gallery
|
|
|
|
JSONArray jsonGalleries = json.getJSONArray("galleries");
|
|
|
|
for (int i = 0; i < jsonGalleries.length(); i++) {
|
|
|
|
if (i > 0) {
|
|
|
|
sleep(500);
|
|
|
|
}
|
|
|
|
JSONObject jsonGallery = jsonGalleries.getJSONObject(i);
|
|
|
|
long galleryID = jsonGallery.getLong("id");
|
|
|
|
String userID = Long.toString(jsonGallery.getLong("user_id"));
|
|
|
|
String blogURL = "https://api.500px.com/v1/users/" + userID + "/galleries/" + galleryID + "/items"
|
|
|
|
+ "?rpp=100"
|
|
|
|
+ "&image_size=5"
|
|
|
|
+ "&consumer_key=" + CONSUMER_KEY;
|
|
|
|
logger.info("Loading " + blogURL);
|
|
|
|
sendUpdate(STATUS.LOADING_RESOURCE, "Gallery ID " + galleryID + " for userID " + userID);
|
|
|
|
JSONObject thisJSON = Http.url(blogURL).getJSON();
|
|
|
|
JSONArray thisPhotos = thisJSON.getJSONArray("photos");
|
|
|
|
// Iterate over every image in this story
|
|
|
|
for (int j = 0; j < thisPhotos.length(); j++) {
|
|
|
|
result.getJSONArray("photos").put(thisPhotos.getJSONObject(j));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
else if (baseURL.contains("/blogs?")) {
|
2015-02-10 08:29:29 +01:00
|
|
|
// List of stories to return
|
2014-06-28 09:53:17 +02:00
|
|
|
JSONObject result = new JSONObject();
|
|
|
|
result.put("photos", new JSONArray());
|
2015-02-10 08:29:29 +01:00
|
|
|
|
2014-06-28 09:53:17 +02:00
|
|
|
// Iterate over every story
|
2015-02-10 08:29:29 +01:00
|
|
|
JSONArray jsonBlogs = json.getJSONArray("blog_posts");
|
2014-06-28 09:53:17 +02:00
|
|
|
for (int i = 0; i < jsonBlogs.length(); i++) {
|
|
|
|
if (i > 0) {
|
|
|
|
sleep(500);
|
|
|
|
}
|
|
|
|
JSONObject jsonBlog = jsonBlogs.getJSONObject(i);
|
|
|
|
int blogid = jsonBlog.getInt("id");
|
|
|
|
String username = jsonBlog.getJSONObject("user").getString("username");
|
|
|
|
String blogURL = "https://api.500px.com/v1/blogs/" + blogid
|
|
|
|
+ "?feature=user"
|
|
|
|
+ "&username=" + username
|
|
|
|
+ "&rpp=100"
|
2014-06-28 10:25:03 +02:00
|
|
|
+ "&image_size=5"
|
2014-06-28 09:53:17 +02:00
|
|
|
+ "&consumer_key=" + CONSUMER_KEY;
|
|
|
|
logger.info("Loading " + blogURL);
|
|
|
|
sendUpdate(STATUS.LOADING_RESOURCE, "Story ID " + blogid + " for user " + username);
|
|
|
|
JSONObject thisJSON = Http.url(blogURL).getJSON();
|
|
|
|
JSONArray thisPhotos = thisJSON.getJSONArray("photos");
|
|
|
|
// Iterate over every image in this story
|
|
|
|
for (int j = 0; j < thisPhotos.length(); j++) {
|
|
|
|
result.getJSONArray("photos").put(thisPhotos.getJSONObject(j));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
return json;
|
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public JSONObject getNextPage(JSONObject json) throws IOException {
|
2015-02-10 08:29:29 +01:00
|
|
|
if (isThisATest()) {
|
|
|
|
return null;
|
|
|
|
}
|
2014-06-28 09:53:17 +02:00
|
|
|
// Check previous JSON to see if we hit the last page
|
|
|
|
if (!json.has("current_page")
|
|
|
|
|| !json.has("total_pages")) {
|
|
|
|
throw new IOException("No more pages");
|
|
|
|
}
|
|
|
|
int currentPage = json.getInt("current_page"),
|
|
|
|
totalPages = json.getInt("total_pages");
|
|
|
|
if (currentPage == totalPages) {
|
|
|
|
throw new IOException("No more results");
|
|
|
|
}
|
|
|
|
|
|
|
|
sleep(500);
|
|
|
|
++page;
|
|
|
|
URL apiURL = new URL(baseURL
|
|
|
|
+ "&page=" + page
|
|
|
|
+ "&consumer_key=" + CONSUMER_KEY);
|
|
|
|
return Http.url(apiURL).getJSON();
|
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public List<String> getURLsFromJSON(JSONObject json) {
|
|
|
|
List<String> imageURLs = new ArrayList<String>();
|
|
|
|
JSONArray photos = json.getJSONArray("photos");
|
|
|
|
for (int i = 0; i < photos.length(); i++) {
|
2015-12-23 06:10:30 +01:00
|
|
|
if (super.isStopped()) {
|
|
|
|
break;
|
|
|
|
}
|
2014-06-28 10:25:03 +02:00
|
|
|
JSONObject photo = photos.getJSONObject(i);
|
2015-12-22 16:47:58 +01:00
|
|
|
String imageURL = null;
|
|
|
|
String rawUrl = "https://500px.com" + photo.getString("url");
|
|
|
|
Document doc;
|
2015-12-23 06:10:30 +01:00
|
|
|
Elements images = new Elements();
|
2015-12-22 16:47:58 +01:00
|
|
|
try {
|
|
|
|
logger.debug("Loading " + rawUrl);
|
|
|
|
super.retrievingSource(rawUrl);
|
|
|
|
doc = Http.url(rawUrl).get();
|
2015-12-23 06:10:30 +01:00
|
|
|
images = doc.select("div#preload img");
|
2015-12-22 16:47:58 +01:00
|
|
|
}
|
|
|
|
catch (IOException e) {
|
|
|
|
logger.error("Error fetching full-size image from " + rawUrl, e);
|
|
|
|
}
|
2015-12-23 06:10:30 +01:00
|
|
|
if (images.size() > 0) {
|
|
|
|
imageURL = images.first().attr("src");
|
|
|
|
logger.debug("Found full-size non-watermarked image: " + imageURL);
|
2015-12-22 16:47:58 +01:00
|
|
|
}
|
|
|
|
else {
|
2015-12-23 06:10:30 +01:00
|
|
|
logger.debug("Falling back to image_url from API response");
|
2015-12-22 16:47:58 +01:00
|
|
|
imageURL = photo.getString("image_url");
|
|
|
|
imageURL = imageURL.replaceAll("/4\\.", "/5.");
|
|
|
|
// See if there's larger images
|
|
|
|
for (String imageSize : new String[] { "2048" } ) {
|
|
|
|
String fsURL = imageURL.replaceAll("/5\\.", "/" + imageSize + ".");
|
|
|
|
sleep(10);
|
|
|
|
if (urlExists(fsURL)) {
|
|
|
|
logger.info("Found larger image at " + fsURL);
|
|
|
|
imageURL = fsURL;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (imageURL == null) {
|
|
|
|
logger.error("Failed to find image for photo " + photo.toString());
|
2014-06-28 10:25:03 +02:00
|
|
|
}
|
2015-12-22 16:47:58 +01:00
|
|
|
else {
|
|
|
|
imageURLs.add(imageURL);
|
|
|
|
if (isThisATest()) {
|
|
|
|
break;
|
|
|
|
}
|
2015-02-10 08:29:29 +01:00
|
|
|
}
|
2014-06-28 09:53:17 +02:00
|
|
|
}
|
|
|
|
return imageURLs;
|
|
|
|
}
|
2017-05-10 00:22:55 +02:00
|
|
|
|
2014-06-28 10:25:03 +02:00
|
|
|
private boolean urlExists(String url) {
|
|
|
|
try {
|
|
|
|
HttpURLConnection connection = (HttpURLConnection) new URL(url).openConnection();
|
|
|
|
connection.setRequestMethod("HEAD");
|
|
|
|
if (connection.getResponseCode() != 200) {
|
|
|
|
throw new IOException("Couldn't find full-size image at " + url);
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
} catch (IOException e) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
2014-06-28 09:53:17 +02:00
|
|
|
|
2015-02-10 08:29:29 +01:00
|
|
|
@Override
|
|
|
|
public boolean keepSortOrder() {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2014-06-28 09:53:17 +02:00
|
|
|
@Override
|
|
|
|
public void downloadURL(URL url, int index) {
|
|
|
|
String u = url.toExternalForm();
|
|
|
|
String[] fields = u.split("/");
|
2015-02-10 08:29:29 +01:00
|
|
|
String prefix = getPrefix(index) + fields[fields.length - 3];
|
|
|
|
File saveAs = new File(getWorkingDir() + File.separator + prefix + ".jpg");
|
|
|
|
addURLToDownload(url, saveAs, "", null);
|
2014-06-28 09:53:17 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
}
|