Merge pull request #222 from cyian-1756/ig_hashtag
Added support for ripping from IG tags
This commit is contained in:
commit
ecf72e517e
@ -69,6 +69,13 @@ public class InstagramRipper extends AbstractHTMLRipper {
|
|||||||
if (m.matches()) {
|
if (m.matches()) {
|
||||||
return m.group(1);
|
return m.group(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
p = Pattern.compile("^https?://www.instagram.com/explore/tags/([^/]+)/?");
|
||||||
|
m = p.matcher(url.toExternalForm());
|
||||||
|
if (m.matches()) {
|
||||||
|
return m.group(1);
|
||||||
|
}
|
||||||
|
|
||||||
throw new MalformedURLException("Unable to find user in " + url);
|
throw new MalformedURLException("Unable to find user in " + url);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -141,11 +148,18 @@ public class InstagramRipper extends AbstractHTMLRipper {
|
|||||||
logger.warn("Unable to exact json from page");
|
logger.warn("Unable to exact json from page");
|
||||||
}
|
}
|
||||||
|
|
||||||
Pattern p = Pattern.compile("^.*instagram\\.com/([a-zA-Z0-9\\-_.]+)/?");
|
Pattern p = Pattern.compile("^.*instagram.com/p/([a-zA-Z0-9\\-_.]+)/?");
|
||||||
Matcher m = p.matcher(url.toExternalForm());
|
Matcher m = p.matcher(url.toExternalForm());
|
||||||
if (m.matches()) {
|
if (!m.matches()) {
|
||||||
|
JSONArray datas = new JSONArray();
|
||||||
|
try {
|
||||||
JSONArray profilePage = json.getJSONObject("entry_data").getJSONArray("ProfilePage");
|
JSONArray profilePage = json.getJSONObject("entry_data").getJSONArray("ProfilePage");
|
||||||
JSONArray datas = profilePage.getJSONObject(0).getJSONObject("user").getJSONObject("media").getJSONArray("nodes");
|
datas = profilePage.getJSONObject(0).getJSONObject("user").getJSONObject("media").getJSONArray("nodes");
|
||||||
|
} catch (JSONException e) {
|
||||||
|
// Handle hashtag pages
|
||||||
|
datas = json.getJSONObject("entry_data").getJSONArray("TagPage").getJSONObject(0)
|
||||||
|
.getJSONObject("tag").getJSONObject("media").getJSONArray("nodes");
|
||||||
|
}
|
||||||
for (int i = 0; i < datas.length(); i++) {
|
for (int i = 0; i < datas.length(); i++) {
|
||||||
JSONObject data = (JSONObject) datas.get(i);
|
JSONObject data = (JSONObject) datas.get(i);
|
||||||
Long epoch = data.getLong("date");
|
Long epoch = data.getLong("date");
|
||||||
@ -175,6 +189,21 @@ public class InstagramRipper extends AbstractHTMLRipper {
|
|||||||
}
|
}
|
||||||
// Rip the next page
|
// Rip the next page
|
||||||
if (!nextPageID.equals("") && !isThisATest()) {
|
if (!nextPageID.equals("") && !isThisATest()) {
|
||||||
|
if (url.toExternalForm().contains("/tags/")) {
|
||||||
|
try {
|
||||||
|
// Sleep for a while to avoid a ban
|
||||||
|
sleep(2500);
|
||||||
|
if (url.toExternalForm().substring(url.toExternalForm().length() - 1).equals("/")) {
|
||||||
|
getURLsFromPage(Http.url(url.toExternalForm() + "?max_id=" + nextPageID).get());
|
||||||
|
} else {
|
||||||
|
getURLsFromPage(Http.url(url.toExternalForm() + "/?max_id=" + nextPageID).get());
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (IOException e) {
|
||||||
|
return imageURLs;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
try {
|
try {
|
||||||
// Sleep for a while to avoid a ban
|
// Sleep for a while to avoid a ban
|
||||||
sleep(2500);
|
sleep(2500);
|
||||||
@ -182,8 +211,11 @@ public class InstagramRipper extends AbstractHTMLRipper {
|
|||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
return imageURLs;
|
return imageURLs;
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
logger.warn("Can't get net page");
|
||||||
}
|
}
|
||||||
} else { // We're ripping from a single page
|
} else { // We're ripping from a single page
|
||||||
|
logger.info("Ripping from single page");
|
||||||
if (!doc.select("meta[property=og:video]").attr("content").equals("")) {
|
if (!doc.select("meta[property=og:video]").attr("content").equals("")) {
|
||||||
String videoURL = doc.select("meta[property=og:video]").attr("content");
|
String videoURL = doc.select("meta[property=og:video]").attr("content");
|
||||||
// We're ripping a page with a video on it
|
// We're ripping a page with a video on it
|
||||||
|
Loading…
Reference in New Issue
Block a user