Twitter: added support for video and multiple-image tweets (#478)

This commit is contained in:
torbica 2017-05-09 23:42:14 +02:00 committed by metaprime
parent a38597d6fe
commit 6ca4ebd176

View File

@ -21,7 +21,7 @@ import com.rarchives.ripme.utils.Utils;
public class TwitterRipper extends AlbumRipper { public class TwitterRipper extends AlbumRipper {
private static final String DOMAIN = "twitter.com", private static final String DOMAIN = "twitter.com",
HOST = "twitter"; HOST = "twitter";
private static final int MAX_REQUESTS = Utils.getConfigInteger("twitter.max_requests", 10); private static final int MAX_REQUESTS = Utils.getConfigInteger("twitter.max_requests", 10);
private static final int WAIT_TIME = 2000; private static final int WAIT_TIME = 2000;
@ -34,6 +34,7 @@ public class TwitterRipper extends AlbumRipper {
ACCOUNT, ACCOUNT,
SEARCH SEARCH
} }
private ALBUM_TYPE albumType; private ALBUM_TYPE albumType;
private String searchText, accountName; private String searchText, accountName;
@ -72,12 +73,12 @@ public class TwitterRipper extends AlbumRipper {
private void getAccessToken() throws IOException { private void getAccessToken() throws IOException {
Document doc = Http.url("https://api.twitter.com/oauth2/token") Document doc = Http.url("https://api.twitter.com/oauth2/token")
.ignoreContentType() .ignoreContentType()
.header("Authorization", "Basic " + authKey) .header("Authorization", "Basic " + authKey)
.header("Content-Type", "application/x-www-form-urlencoded;charset=UTF-8") .header("Content-Type", "application/x-www-form-urlencoded;charset=UTF-8")
.header("User-agent", "ripe and zipe") .header("User-agent", "ripe and zipe")
.data("grant_type", "client_credentials") .data("grant_type", "client_credentials")
.post(); .post();
String body = doc.body().html().replaceAll(""", "\""); String body = doc.body().html().replaceAll(""", "\"");
try { try {
JSONObject json = new JSONObject(body); JSONObject json = new JSONObject(body);
@ -91,17 +92,17 @@ public class TwitterRipper extends AlbumRipper {
private void checkRateLimits(String resource, String api) throws IOException { private void checkRateLimits(String resource, String api) throws IOException {
Document doc = Http.url("https://api.twitter.com/1.1/application/rate_limit_status.json?resources=" + resource) Document doc = Http.url("https://api.twitter.com/1.1/application/rate_limit_status.json?resources=" + resource)
.ignoreContentType() .ignoreContentType()
.header("Authorization", "Bearer " + accessToken) .header("Authorization", "Bearer " + accessToken)
.header("Content-Type", "application/x-www-form-urlencoded;charset=UTF-8") .header("Content-Type", "application/x-www-form-urlencoded;charset=UTF-8")
.header("User-agent", "ripe and zipe") .header("User-agent", "ripe and zipe")
.get(); .get();
String body = doc.body().html().replaceAll(""", "\""); String body = doc.body().html().replaceAll(""", "\"");
try { try {
JSONObject json = new JSONObject(body); JSONObject json = new JSONObject(body);
JSONObject stats = json.getJSONObject("resources") JSONObject stats = json.getJSONObject("resources")
.getJSONObject(resource) .getJSONObject(resource)
.getJSONObject(api); .getJSONObject(api);
int remaining = stats.getInt("remaining"); int remaining = stats.getInt("remaining");
logger.info(" Twitter " + resource + " calls remaining: " + remaining); logger.info(" Twitter " + resource + " calls remaining: " + remaining);
if (remaining < 20) { if (remaining < 20) {
@ -117,22 +118,22 @@ public class TwitterRipper extends AlbumRipper {
private String getApiURL(Long maxID) { private String getApiURL(Long maxID) {
StringBuilder req = new StringBuilder(); StringBuilder req = new StringBuilder();
switch (albumType) { switch (albumType) {
case ACCOUNT: case ACCOUNT:
req.append("https://api.twitter.com/1.1/statuses/user_timeline.json") req.append("https://api.twitter.com/1.1/statuses/user_timeline.json")
.append("?screen_name=" + this.accountName) .append("?screen_name=" + this.accountName)
.append("&include_entities=true") .append("&include_entities=true")
.append("&exclude_replies=true") .append("&exclude_replies=true")
.append("&trim_user=true") .append("&trim_user=true")
.append("&include_rts=false") .append("&include_rts=false")
.append("&count=" + 200); .append("&count=" + 200);
break; break;
case SEARCH: case SEARCH:
req.append("https://api.twitter.com/1.1/search/tweets.json") req.append("https://api.twitter.com/1.1/search/tweets.json")
.append("?q=" + this.searchText) .append("?q=" + this.searchText)
.append("&include_entities=true") .append("&include_entities=true")
.append("&result_type=recent") .append("&result_type=recent")
.append("&count=100"); .append("&count=100");
break; break;
} }
if (maxID > 0) { if (maxID > 0) {
req.append("&max_id=" + Long.toString(maxID)); req.append("&max_id=" + Long.toString(maxID));
@ -144,11 +145,11 @@ public class TwitterRipper extends AlbumRipper {
List<JSONObject> tweets = new ArrayList<JSONObject>(); List<JSONObject> tweets = new ArrayList<JSONObject>();
logger.info(" Retrieving " + url); logger.info(" Retrieving " + url);
Document doc = Http.url(url) Document doc = Http.url(url)
.ignoreContentType() .ignoreContentType()
.header("Authorization", "Bearer " + accessToken) .header("Authorization", "Bearer " + accessToken)
.header("Content-Type", "application/x-www-form-urlencoded;charset=UTF-8") .header("Content-Type", "application/x-www-form-urlencoded;charset=UTF-8")
.header("User-agent", "ripe and zipe") .header("User-agent", "ripe and zipe")
.get(); .get();
String body = doc.body().html().replaceAll("&quot;", "\""); String body = doc.body().html().replaceAll("&quot;", "\"");
Object jsonObj = new JSONTokener(body).nextValue(); Object jsonObj = new JSONTokener(body).nextValue();
JSONArray statuses; JSONArray statuses;
@ -168,47 +169,47 @@ public class TwitterRipper extends AlbumRipper {
return tweets; return tweets;
} }
private boolean parseTweet(JSONObject tweet) throws MalformedURLException { private int parseTweet(JSONObject tweet) throws MalformedURLException {
if (!tweet.has("entities")) { int parsedCount = 0;
if (!tweet.has("extended_entities")) {
logger.error("XXX Tweet doesn't have entitites"); logger.error("XXX Tweet doesn't have entitites");
return false; return 0;
} }
JSONObject entities = tweet.getJSONObject("entities"); JSONObject entities = tweet.getJSONObject("extended_entities");
if (entities.has("media")) { if (entities.has("media")) {
JSONArray medias = entities.getJSONArray("media"); JSONArray medias = entities.getJSONArray("media");
String url; String url;
JSONObject media; JSONObject media;
for (int i = 0; i < medias.length(); i++) { for (int i = 0; i < medias.length(); i++) {
media = (JSONObject) medias.get(i); media = (JSONObject) medias.get(i);
url = media.getString("media_url"); url = media.getString("media_url");
if (url.contains(".twimg.com/")) { if (media.getString("type").equals("video")) {
url += ":orig"; JSONArray variants = media.getJSONObject("video_info").getJSONArray("variants");
addURLToDownload(new URL(url)); for (int j = 0; j < medias.length(); j++) {
return true; JSONObject variant = (JSONObject) variants.get(i);
} if (variant.has("bitrate") && variant.getInt("bitrate") == 832000) {
else { addURLToDownload(new URL(variant.getString("url")));
logger.debug("Unexpected media_url: " + url); parsedCount++;
break;
}
}
} else if (media.getString("type").equals("photo")) {
if (url.contains(".twimg.com/")) {
url += ":orig";
addURLToDownload(new URL(url));
parsedCount++;
} else {
logger.debug("Unexpected media_url: " + url);
}
} }
} }
} }
/*
if (entities.has("urls")) { return parsedCount;
JSONArray urls = entities.getJSONArray("urls");
JSONObject url;
for (int i = 0; i < urls.length(); i++) {
url = (JSONObject) urls.get(i);
if (url.get("expanded_url") != null) {
handleTweetedURL(url.getString("url"));
} else {
handleTweetedURL(url.getString("expanded_url"));
}
}
}
*/
return false;
} }
@Override @Override
@ -216,12 +217,12 @@ public class TwitterRipper extends AlbumRipper {
getAccessToken(); getAccessToken();
switch (albumType) { switch (albumType) {
case ACCOUNT: case ACCOUNT:
checkRateLimits("statuses", "/statuses/user_timeline"); checkRateLimits("statuses", "/statuses/user_timeline");
break; break;
case SEARCH: case SEARCH:
checkRateLimits("search", "/search/tweets"); checkRateLimits("search", "/search/tweets");
break; break;
} }
Long lastMaxID = 0L; Long lastMaxID = 0L;
@ -235,29 +236,27 @@ public class TwitterRipper extends AlbumRipper {
logger.debug("Twitter response #" + (i + 1) + " Tweets:\n" + tweets); logger.debug("Twitter response #" + (i + 1) + " Tweets:\n" + tweets);
if (tweets.size() == 1 && if (tweets.size() == 1 &&
lastMaxID.equals(tweets.get(0).getString("id_str")) lastMaxID.equals(tweets.get(0).getString("id_str"))
) { ) {
logger.info(" No more tweet found."); logger.info(" No more tweet found.");
break; break;
} }
for (JSONObject tweet : tweets) { for (JSONObject tweet : tweets) {
lastMaxID = tweet.getLong("id"); lastMaxID = tweet.getLong("id");
if (parseTweet(tweet)) { parsedCount += parseTweet(tweet);
parsedCount++;
} if (isStopped() || (isThisATest() && parsedCount > 0)) {
if (isStopped() || (isThisATest() && parsedCount > 0) ) {
break; break;
} }
} }
if (isStopped() || (isThisATest() && parsedCount > 0) ) { if (isStopped() || (isThisATest() && parsedCount > 0)) {
break; break;
} }
try { try {
Thread.sleep(WAIT_TIME); Thread.sleep(WAIT_TIME);
} } catch (InterruptedException e) {
catch (InterruptedException e) {
logger.error("[!] Interrupted while waiting to load more results", e); logger.error("[!] Interrupted while waiting to load more results", e);
break; break;
} }
@ -274,27 +273,27 @@ public class TwitterRipper extends AlbumRipper {
@Override @Override
public String getGID(URL url) throws MalformedURLException { public String getGID(URL url) throws MalformedURLException {
switch (albumType) { switch (albumType) {
case ACCOUNT: case ACCOUNT:
return "account_" + accountName; return "account_" + accountName;
case SEARCH: case SEARCH:
StringBuilder gid = new StringBuilder(); StringBuilder gid = new StringBuilder();
for (int i = 0; i < searchText.length(); i++) { for (int i = 0; i < searchText.length(); i++) {
char c = searchText.charAt(i); char c = searchText.charAt(i);
// Ignore URL-encoded chars // Ignore URL-encoded chars
if (c == '%') { if (c == '%') {
gid.append('_'); gid.append('_');
i += 2; i += 2;
continue; continue;
// Ignore non-alphanumeric chars // Ignore non-alphanumeric chars
} else if ( } else if (
(c >= 'a' && c <= 'z') (c >= 'a' && c <= 'z')
|| (c >= 'A' && c <= 'Z') || (c >= 'A' && c <= 'Z')
|| (c >= '0' && c <= '9') || (c >= '0' && c <= '9')
) { ) {
gid.append(c); gid.append(c);
}
} }
} return "search_" + gid.toString();
return "search_" + gid.toString();
} }
throw new MalformedURLException("Could not decide type of URL (search/account): " + url); throw new MalformedURLException("Could not decide type of URL (search/account): " + url);
} }