Added support for ripping from tags

This commit is contained in:
cyian-1756 2017-11-18 07:22:25 -05:00
parent 67db5f3d99
commit e8185aaf3e

View File

@ -62,6 +62,13 @@ public class InstagramRipper extends AbstractHTMLRipper {
if (m.matches()) { if (m.matches()) {
return m.group(1); return m.group(1);
} }
p = Pattern.compile("^https?://www.instagram.com/explore/tags/([^/]+)/?");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
throw new MalformedURLException("Unable to find user in " + url); throw new MalformedURLException("Unable to find user in " + url);
} }
@ -134,11 +141,18 @@ public class InstagramRipper extends AbstractHTMLRipper {
logger.warn("Unable to exact json from page"); logger.warn("Unable to exact json from page");
} }
Pattern p = Pattern.compile("^.*instagram\\.com/([a-zA-Z0-9\\-_.]+)/?"); Pattern p = Pattern.compile("^.*instagram.com/p/([a-zA-Z0-9\\-_.]+)/?");
Matcher m = p.matcher(url.toExternalForm()); Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) { if (!m.matches()) {
JSONArray datas = new JSONArray();
try {
JSONArray profilePage = json.getJSONObject("entry_data").getJSONArray("ProfilePage"); JSONArray profilePage = json.getJSONObject("entry_data").getJSONArray("ProfilePage");
JSONArray datas = profilePage.getJSONObject(0).getJSONObject("user").getJSONObject("media").getJSONArray("nodes"); datas = profilePage.getJSONObject(0).getJSONObject("user").getJSONObject("media").getJSONArray("nodes");
} catch (JSONException e) {
// Handle hashtag pages
datas = json.getJSONObject("entry_data").getJSONArray("TagPage").getJSONObject(0)
.getJSONObject("tag").getJSONObject("media").getJSONArray("nodes");
}
for (int i = 0; i < datas.length(); i++) { for (int i = 0; i < datas.length(); i++) {
JSONObject data = (JSONObject) datas.get(i); JSONObject data = (JSONObject) datas.get(i);
Long epoch = data.getLong("date"); Long epoch = data.getLong("date");
@ -168,6 +182,16 @@ public class InstagramRipper extends AbstractHTMLRipper {
} }
// Rip the next page // Rip the next page
if (!nextPageID.equals("") && !isThisATest()) { if (!nextPageID.equals("") && !isThisATest()) {
if (url.toExternalForm().contains("/tags/")) {
try {
// Sleep for a while to avoid a ban
sleep(2500);
getURLsFromPage(Http.url(url.toExternalForm() + "?max_id=" + nextPageID).get());
} catch (IOException e) {
return imageURLs;
}
}
try { try {
// Sleep for a while to avoid a ban // Sleep for a while to avoid a ban
sleep(2500); sleep(2500);
@ -177,6 +201,7 @@ public class InstagramRipper extends AbstractHTMLRipper {
} }
} }
} else { // We're ripping from a single page } else { // We're ripping from a single page
logger.info("Ripping from single page");
if (!doc.select("meta[property=og:video]").attr("content").equals("")) { if (!doc.select("meta[property=og:video]").attr("content").equals("")) {
String videoURL = doc.select("meta[property=og:video]").attr("content"); String videoURL = doc.select("meta[property=og:video]").attr("content");
// We're ripping a page with a video on it // We're ripping a page with a video on it