From 6ca4ebd1766bd923d4951c2ea3f9dd2c88b8cfb8 Mon Sep 17 00:00:00 2001 From: torbica Date: Tue, 9 May 2017 23:42:14 +0200 Subject: [PATCH] Twitter: added support for video and multiple-image tweets (#478) --- .../ripme/ripper/rippers/TwitterRipper.java | 201 +++++++++--------- 1 file changed, 100 insertions(+), 101 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/TwitterRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/TwitterRipper.java index 45967f3a..fa90d4a8 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/TwitterRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/TwitterRipper.java @@ -21,7 +21,7 @@ import com.rarchives.ripme.utils.Utils; public class TwitterRipper extends AlbumRipper { private static final String DOMAIN = "twitter.com", - HOST = "twitter"; + HOST = "twitter"; private static final int MAX_REQUESTS = Utils.getConfigInteger("twitter.max_requests", 10); private static final int WAIT_TIME = 2000; @@ -34,9 +34,10 @@ public class TwitterRipper extends AlbumRipper { ACCOUNT, SEARCH } + private ALBUM_TYPE albumType; private String searchText, accountName; - + public TwitterRipper(URL url) throws IOException { super(url); authKey = Utils.getConfigString("twitter.auth", null); @@ -69,15 +70,15 @@ public class TwitterRipper extends AlbumRipper { } throw new MalformedURLException("Expected username or search string in url: " + url); } - + private void getAccessToken() throws IOException { Document doc = Http.url("https://api.twitter.com/oauth2/token") - .ignoreContentType() - .header("Authorization", "Basic " + authKey) - .header("Content-Type", "application/x-www-form-urlencoded;charset=UTF-8") - .header("User-agent", "ripe and zipe") - .data("grant_type", "client_credentials") - .post(); + .ignoreContentType() + .header("Authorization", "Basic " + authKey) + .header("Content-Type", "application/x-www-form-urlencoded;charset=UTF-8") + .header("User-agent", "ripe and zipe") + .data("grant_type", "client_credentials") + .post(); String body = doc.body().html().replaceAll(""", "\""); try { JSONObject json = new JSONObject(body); @@ -88,20 +89,20 @@ public class TwitterRipper extends AlbumRipper { throw new IOException("Failure while parsing JSON: " + body, e); } } - + private void checkRateLimits(String resource, String api) throws IOException { Document doc = Http.url("https://api.twitter.com/1.1/application/rate_limit_status.json?resources=" + resource) - .ignoreContentType() - .header("Authorization", "Bearer " + accessToken) - .header("Content-Type", "application/x-www-form-urlencoded;charset=UTF-8") - .header("User-agent", "ripe and zipe") - .get(); + .ignoreContentType() + .header("Authorization", "Bearer " + accessToken) + .header("Content-Type", "application/x-www-form-urlencoded;charset=UTF-8") + .header("User-agent", "ripe and zipe") + .get(); String body = doc.body().html().replaceAll(""", "\""); try { JSONObject json = new JSONObject(body); JSONObject stats = json.getJSONObject("resources") - .getJSONObject(resource) - .getJSONObject(api); + .getJSONObject(resource) + .getJSONObject(api); int remaining = stats.getInt("remaining"); logger.info(" Twitter " + resource + " calls remaining: " + remaining); if (remaining < 20) { @@ -113,42 +114,42 @@ public class TwitterRipper extends AlbumRipper { throw new IOException("Error while parsing JSON: " + body, e); } } - + private String getApiURL(Long maxID) { StringBuilder req = new StringBuilder(); switch (albumType) { - case ACCOUNT: - req.append("https://api.twitter.com/1.1/statuses/user_timeline.json") - .append("?screen_name=" + this.accountName) - .append("&include_entities=true") - .append("&exclude_replies=true") - .append("&trim_user=true") - .append("&include_rts=false") - .append("&count=" + 200); - break; - case SEARCH: - req.append("https://api.twitter.com/1.1/search/tweets.json") - .append("?q=" + this.searchText) - .append("&include_entities=true") - .append("&result_type=recent") - .append("&count=100"); - break; + case ACCOUNT: + req.append("https://api.twitter.com/1.1/statuses/user_timeline.json") + .append("?screen_name=" + this.accountName) + .append("&include_entities=true") + .append("&exclude_replies=true") + .append("&trim_user=true") + .append("&include_rts=false") + .append("&count=" + 200); + break; + case SEARCH: + req.append("https://api.twitter.com/1.1/search/tweets.json") + .append("?q=" + this.searchText) + .append("&include_entities=true") + .append("&result_type=recent") + .append("&count=100"); + break; } if (maxID > 0) { req.append("&max_id=" + Long.toString(maxID)); } return req.toString(); } - + private List getTweets(String url) throws IOException { List tweets = new ArrayList(); logger.info(" Retrieving " + url); Document doc = Http.url(url) - .ignoreContentType() - .header("Authorization", "Bearer " + accessToken) - .header("Content-Type", "application/x-www-form-urlencoded;charset=UTF-8") - .header("User-agent", "ripe and zipe") - .get(); + .ignoreContentType() + .header("Authorization", "Bearer " + accessToken) + .header("Content-Type", "application/x-www-form-urlencoded;charset=UTF-8") + .header("User-agent", "ripe and zipe") + .get(); String body = doc.body().html().replaceAll(""", "\""); Object jsonObj = new JSONTokener(body).nextValue(); JSONArray statuses; @@ -168,47 +169,47 @@ public class TwitterRipper extends AlbumRipper { return tweets; } - private boolean parseTweet(JSONObject tweet) throws MalformedURLException { - if (!tweet.has("entities")) { + private int parseTweet(JSONObject tweet) throws MalformedURLException { + int parsedCount = 0; + if (!tweet.has("extended_entities")) { logger.error("XXX Tweet doesn't have entitites"); - return false; + return 0; } - JSONObject entities = tweet.getJSONObject("entities"); + JSONObject entities = tweet.getJSONObject("extended_entities"); if (entities.has("media")) { JSONArray medias = entities.getJSONArray("media"); String url; JSONObject media; + for (int i = 0; i < medias.length(); i++) { media = (JSONObject) medias.get(i); url = media.getString("media_url"); - if (url.contains(".twimg.com/")) { - url += ":orig"; - addURLToDownload(new URL(url)); - return true; - } - else { - logger.debug("Unexpected media_url: " + url); + if (media.getString("type").equals("video")) { + JSONArray variants = media.getJSONObject("video_info").getJSONArray("variants"); + for (int j = 0; j < medias.length(); j++) { + JSONObject variant = (JSONObject) variants.get(i); + if (variant.has("bitrate") && variant.getInt("bitrate") == 832000) { + addURLToDownload(new URL(variant.getString("url"))); + parsedCount++; + break; + } + } + } else if (media.getString("type").equals("photo")) { + if (url.contains(".twimg.com/")) { + url += ":orig"; + addURLToDownload(new URL(url)); + parsedCount++; + } else { + logger.debug("Unexpected media_url: " + url); + } } } } - /* - if (entities.has("urls")) { - JSONArray urls = entities.getJSONArray("urls"); - JSONObject url; - for (int i = 0; i < urls.length(); i++) { - url = (JSONObject) urls.get(i); - if (url.get("expanded_url") != null) { - handleTweetedURL(url.getString("url")); - } else { - handleTweetedURL(url.getString("expanded_url")); - } - } - } - */ - return false; + + return parsedCount; } @Override @@ -216,12 +217,12 @@ public class TwitterRipper extends AlbumRipper { getAccessToken(); switch (albumType) { - case ACCOUNT: - checkRateLimits("statuses", "/statuses/user_timeline"); - break; - case SEARCH: - checkRateLimits("search", "/search/tweets"); - break; + case ACCOUNT: + checkRateLimits("statuses", "/statuses/user_timeline"); + break; + case SEARCH: + checkRateLimits("search", "/search/tweets"); + break; } Long lastMaxID = 0L; @@ -233,31 +234,29 @@ public class TwitterRipper extends AlbumRipper { break; } logger.debug("Twitter response #" + (i + 1) + " Tweets:\n" + tweets); - if (tweets.size() == 1 && + if (tweets.size() == 1 && lastMaxID.equals(tweets.get(0).getString("id_str")) - ) { + ) { logger.info(" No more tweet found."); break; } for (JSONObject tweet : tweets) { lastMaxID = tweet.getLong("id"); - if (parseTweet(tweet)) { - parsedCount++; - } - if (isStopped() || (isThisATest() && parsedCount > 0) ) { + parsedCount += parseTweet(tweet); + + if (isStopped() || (isThisATest() && parsedCount > 0)) { break; } } - if (isStopped() || (isThisATest() && parsedCount > 0) ) { + if (isStopped() || (isThisATest() && parsedCount > 0)) { break; } try { Thread.sleep(WAIT_TIME); - } - catch (InterruptedException e) { + } catch (InterruptedException e) { logger.error("[!] Interrupted while waiting to load more results", e); break; } @@ -274,27 +273,27 @@ public class TwitterRipper extends AlbumRipper { @Override public String getGID(URL url) throws MalformedURLException { switch (albumType) { - case ACCOUNT: - return "account_" + accountName; - case SEARCH: - StringBuilder gid = new StringBuilder(); - for (int i = 0; i < searchText.length(); i++) { - char c = searchText.charAt(i); - // Ignore URL-encoded chars - if (c == '%') { - gid.append('_'); - i += 2; - continue; - // Ignore non-alphanumeric chars - } else if ( - (c >= 'a' && c <= 'z') - || (c >= 'A' && c <= 'Z') - || (c >= '0' && c <= '9') - ) { - gid.append(c); + case ACCOUNT: + return "account_" + accountName; + case SEARCH: + StringBuilder gid = new StringBuilder(); + for (int i = 0; i < searchText.length(); i++) { + char c = searchText.charAt(i); + // Ignore URL-encoded chars + if (c == '%') { + gid.append('_'); + i += 2; + continue; + // Ignore non-alphanumeric chars + } else if ( + (c >= 'a' && c <= 'z') + || (c >= 'A' && c <= 'Z') + || (c >= '0' && c <= '9') + ) { + gid.append(c); + } } - } - return "search_" + gid.toString(); + return "search_" + gid.toString(); } throw new MalformedURLException("Could not decide type of URL (search/account): " + url); }