From c7fce6a420b2d7f46c4e8d7a94c8a873bcfcf390 Mon Sep 17 00:00:00 2001 From: 4pr0n Date: Sat, 28 Jun 2014 00:53:17 -0700 Subject: [PATCH] 500px ripper, uses API, doesn't get full-size image --- .../ripper/rippers/FivehundredpxRipper.java | 190 ++++++++++++++++++ 1 file changed, 190 insertions(+) create mode 100644 src/main/java/com/rarchives/ripme/ripper/rippers/FivehundredpxRipper.java diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/FivehundredpxRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/FivehundredpxRipper.java new file mode 100644 index 00000000..15877665 --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/FivehundredpxRipper.java @@ -0,0 +1,190 @@ +package com.rarchives.ripme.ripper.rippers; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.json.JSONArray; +import org.json.JSONObject; + +import com.rarchives.ripme.ripper.AbstractJSONRipper; +import com.rarchives.ripme.ui.RipStatusMessage.STATUS; +import com.rarchives.ripme.utils.Http; + +/** + * https://github.com/500px/api-documentation + * http://500px.com/tsyganov/stories/80675/galya ("blog") + * http://500px.com/tsyganov/stories ("blogs") - get HTML, parse stories + * http://500px.com/tsyganov/favorites + * http://500px.com/tsyganov (photos) + * https://api.500px.com/v1/photo + * ?rpp=100 + * &feature=user + * &image_size=3 + * &page=3 + * &sort=created_at + * &include_states=false + * &user_id=1913159 + * &consumer_key=XPm2br2zGBq6TOfd2xbDIHYoLnt3cLxr1HYryGCv + * + */ +public class FivehundredpxRipper extends AbstractJSONRipper { + + private int page = 1; + private String baseURL = "https://api.500px.com/v1"; + private static final String CONSUMER_KEY = "XPm2br2zGBq6TOfd2xbDIHYoLnt3cLxr1HYryGCv"; + + public FivehundredpxRipper(URL url) throws IOException { + super(url); + } + + @Override + public String getHost() { + return "500px"; + } + @Override + public String getDomain() { + return "500px.com"; + } + + @Override + public String getGID(URL url) throws MalformedURLException { + Pattern p; Matcher m; + + // http://500px.com/tsyganov/stories/80675/galya ("blog") + p = Pattern.compile("^.*500px.com/([a-zA-Z0-9\\-_]+)/stories/([0-9]+).*$"); + m = p.matcher(url.toExternalForm()); + if (m.matches()) { + String username = m.group(1), + blogid = m.group(2); + baseURL += "/blogs/" + blogid + + "?feature=user" + + "&username=" + username + + "&rpp=100"; + return username + "_stories_" + blogid; + } + + // http://500px.com/tsyganov/stories ("blogs") + p = Pattern.compile("^.*500px.com/([a-zA-Z0-9\\-_]+)/stories/?$"); + m = p.matcher(url.toExternalForm()); + if (m.matches()) { + String username = m.group(1); + baseURL += "/blogs" + + "?feature=user" + + "&username=" + username + + "&rpp=100"; + return username + "_stories"; + } + + // http://500px.com/tsyganov/favorites + p = Pattern.compile("^.*500px.com/([a-zA-Z0-9\\-_]+)/favorites/?$"); + m = p.matcher(url.toExternalForm()); + if (m.matches()) { + String username = m.group(1); + baseURL += "/photos" + + "?feature=user_favorites" + + "&username=" + username + + "&rpp=100" + + "&image_size=4"; + return username + "_faves"; + } + + // http://500px.com/tsyganov (photos) + p = Pattern.compile("^.*500px.com/([a-zA-Z0-9\\-_]+)/?$"); + m = p.matcher(url.toExternalForm()); + if (m.matches()) { + String username = m.group(1); + baseURL += "/photos" + + "?feature=user" + + "&username=" + username + + "&rpp=100" + + "&image_size=4"; + return username + "_faves"; + } + + throw new MalformedURLException( + "Expected 500px.com gallery formats: " + + "/stories/### /stories /favorites /" + + " Got: " + url); + } + + @Override + public JSONObject getFirstPage() throws IOException { + URL apiURL = new URL(baseURL + "&consumer_key=" + CONSUMER_KEY); + JSONObject json = Http.url(apiURL).getJSON(); + if (baseURL.contains("/blogs?")) { + // List of stories + JSONObject result = new JSONObject(); + result.put("photos", new JSONArray()); + JSONArray jsonBlogs = json.getJSONArray("blog_posts"); + // Iterate over every story + for (int i = 0; i < jsonBlogs.length(); i++) { + if (i > 0) { + sleep(500); + } + JSONObject jsonBlog = jsonBlogs.getJSONObject(i); + int blogid = jsonBlog.getInt("id"); + String username = jsonBlog.getJSONObject("user").getString("username"); + String blogURL = "https://api.500px.com/v1/blogs/" + blogid + + "?feature=user" + + "&username=" + username + + "&rpp=100" + + "&consumer_key=" + CONSUMER_KEY; + logger.info("Loading " + blogURL); + sendUpdate(STATUS.LOADING_RESOURCE, "Story ID " + blogid + " for user " + username); + JSONObject thisJSON = Http.url(blogURL).getJSON(); + JSONArray thisPhotos = thisJSON.getJSONArray("photos"); + // Iterate over every image in this story + for (int j = 0; j < thisPhotos.length(); j++) { + result.getJSONArray("photos").put(thisPhotos.getJSONObject(j)); + } + } + return result; + } + return json; + } + + @Override + public JSONObject getNextPage(JSONObject json) throws IOException { + // Check previous JSON to see if we hit the last page + if (!json.has("current_page") + || !json.has("total_pages")) { + throw new IOException("No more pages"); + } + int currentPage = json.getInt("current_page"), + totalPages = json.getInt("total_pages"); + if (currentPage == totalPages) { + throw new IOException("No more results"); + } + + sleep(500); + ++page; + URL apiURL = new URL(baseURL + + "&page=" + page + + "&consumer_key=" + CONSUMER_KEY); + return Http.url(apiURL).getJSON(); + } + + @Override + public List getURLsFromJSON(JSONObject json) { + List imageURLs = new ArrayList(); + JSONArray photos = json.getJSONArray("photos"); + for (int i = 0; i < photos.length(); i++) { + imageURLs.add(photos.getJSONObject(i).getString("image_url")); + } + return imageURLs; + } + + @Override + public void downloadURL(URL url, int index) { + String u = url.toExternalForm(); + String[] fields = u.split("/"); + String prefix = getPrefix(index) + fields[fields.length - 2] + "-"; + addURLToDownload(url, prefix); + } + +}