Merge pull request #504 from cyian-1756/ig-nextpage-fix
Ig nextpage fix
This commit is contained in:
commit
d5e89d90c6
@ -3,6 +3,7 @@ package com.rarchives.ripme.ripper.rippers;
|
|||||||
import java.io.BufferedReader;
|
import java.io.BufferedReader;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStreamReader;
|
import java.io.InputStreamReader;
|
||||||
|
import java.io.UnsupportedEncodingException;
|
||||||
import java.net.MalformedURLException;
|
import java.net.MalformedURLException;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
import java.net.URLConnection;
|
import java.net.URLConnection;
|
||||||
@ -12,6 +13,7 @@ import java.util.ArrayList;
|
|||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
import java.security.*;
|
||||||
|
|
||||||
import org.json.JSONArray;
|
import org.json.JSONArray;
|
||||||
import org.json.JSONException;
|
import org.json.JSONException;
|
||||||
@ -20,6 +22,7 @@ import org.json.JSONObject;
|
|||||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||||
import com.rarchives.ripme.utils.Http;
|
import com.rarchives.ripme.utils.Http;
|
||||||
|
|
||||||
|
import org.jsoup.Connection;
|
||||||
import org.jsoup.nodes.Document;
|
import org.jsoup.nodes.Document;
|
||||||
import org.jsoup.nodes.Element;
|
import org.jsoup.nodes.Element;
|
||||||
import com.rarchives.ripme.ui.RipStatusMessage;
|
import com.rarchives.ripme.ui.RipStatusMessage;
|
||||||
@ -34,6 +37,10 @@ public class InstagramRipper extends AbstractHTMLRipper {
|
|||||||
private String tagName;
|
private String tagName;
|
||||||
|
|
||||||
private String userID;
|
private String userID;
|
||||||
|
private String rhx_gis = null;
|
||||||
|
private String csrftoken;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
public InstagramRipper(URL url) throws IOException {
|
public InstagramRipper(URL url) throws IOException {
|
||||||
super(url);
|
super(url);
|
||||||
@ -178,7 +185,10 @@ public class InstagramRipper extends AbstractHTMLRipper {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Document getFirstPage() throws IOException {
|
public Document getFirstPage() throws IOException {
|
||||||
Document p = Http.url(url).get();
|
Connection.Response resp = Http.url(url).response();
|
||||||
|
logger.info(resp.cookies());
|
||||||
|
csrftoken = resp.cookie("csrftoken");
|
||||||
|
Document p = resp.parse();
|
||||||
// Get the query hash so we can download the next page
|
// Get the query hash so we can download the next page
|
||||||
qHash = getQHash(p);
|
qHash = getQHash(p);
|
||||||
return p;
|
return p;
|
||||||
@ -234,7 +244,10 @@ public class InstagramRipper extends AbstractHTMLRipper {
|
|||||||
logger.warn("Unable to exact json from page");
|
logger.warn("Unable to exact json from page");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// get the rhx_gis value so we can get the next page later on
|
||||||
|
if (rhx_gis == null) {
|
||||||
|
rhx_gis = json.getString("rhx_gis");
|
||||||
|
}
|
||||||
if (!url.toExternalForm().contains("/p/")) {
|
if (!url.toExternalForm().contains("/p/")) {
|
||||||
JSONArray datas = new JSONArray();
|
JSONArray datas = new JSONArray();
|
||||||
if (!rippingTag) {
|
if (!rippingTag) {
|
||||||
@ -314,18 +327,41 @@ public class InstagramRipper extends AbstractHTMLRipper {
|
|||||||
return imageURLs;
|
return imageURLs;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private String getIGGis(String variables) {
|
||||||
|
String stringToMD5 = rhx_gis + ":" + csrftoken + ":" + USER_AGENT + ":" + variables;
|
||||||
|
logger.debug("String to md5 is \"" + stringToMD5 + "\"");
|
||||||
|
try {
|
||||||
|
byte[] bytesOfMessage = stringToMD5.getBytes("UTF-8");
|
||||||
|
|
||||||
|
MessageDigest md = MessageDigest.getInstance("MD5");
|
||||||
|
byte[] hash = md.digest(bytesOfMessage);
|
||||||
|
StringBuffer sb = new StringBuffer();
|
||||||
|
for (int i = 0; i < hash.length; ++i) {
|
||||||
|
sb.append(Integer.toHexString((hash[i] & 0xFF) | 0x100).substring(1,3));
|
||||||
|
}
|
||||||
|
return sb.toString();
|
||||||
|
} catch(UnsupportedEncodingException e) {
|
||||||
|
return null;
|
||||||
|
} catch(NoSuchAlgorithmException e) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Document getNextPage(Document doc) throws IOException {
|
public Document getNextPage(Document doc) throws IOException {
|
||||||
Document toreturn;
|
Document toreturn;
|
||||||
java.util.Map<String, String> cookies = new HashMap<String, String>();
|
java.util.Map<String, String> cookies = new HashMap<String, String>();
|
||||||
// This shouldn't be hardcoded and will break one day
|
// This shouldn't be hardcoded and will break one day
|
||||||
cookies.put("ig_pr", "1");
|
cookies.put("ig_pr", "1");
|
||||||
|
cookies.put("csrftoken", csrftoken);
|
||||||
if (!nextPageID.equals("") && !isThisATest()) {
|
if (!nextPageID.equals("") && !isThisATest()) {
|
||||||
if (rippingTag) {
|
if (rippingTag) {
|
||||||
try {
|
try {
|
||||||
sleep(2500);
|
sleep(2500);
|
||||||
|
String vars = "{\"tag_name\":\"" + tagName + "\",\"first\":4,\"after\":\"" + nextPageID + "\"}";
|
||||||
|
String ig_gis = getIGGis(vars);
|
||||||
toreturn = Http.url("https://www.instagram.com/graphql/query/?query_hash=" + qHash +
|
toreturn = Http.url("https://www.instagram.com/graphql/query/?query_hash=" + qHash +
|
||||||
"&variables={\"tag_name\":\"" + tagName + "\",\"first\":4,\"after\":\"" + nextPageID + "\"}").cookies(cookies).ignoreContentType().get();
|
"&variables=" + vars).header("x-instagram-gis", ig_gis).cookies(cookies).ignoreContentType().get();
|
||||||
// Sleep for a while to avoid a ban
|
// Sleep for a while to avoid a ban
|
||||||
logger.info(toreturn.html());
|
logger.info(toreturn.html());
|
||||||
return toreturn;
|
return toreturn;
|
||||||
@ -338,8 +374,11 @@ public class InstagramRipper extends AbstractHTMLRipper {
|
|||||||
try {
|
try {
|
||||||
// Sleep for a while to avoid a ban
|
// Sleep for a while to avoid a ban
|
||||||
sleep(2500);
|
sleep(2500);
|
||||||
toreturn = Http.url("https://www.instagram.com/graphql/query/?query_hash=" + qHash + "&variables=" +
|
String vars = "{\"id\":\"" + userID + "\",\"first\":100,\"after\":\"" + nextPageID + "\"}";
|
||||||
"{\"id\":\"" + userID + "\",\"first\":100,\"after\":\"" + nextPageID + "\"}").cookies(cookies).ignoreContentType().get();
|
String ig_gis = getIGGis(vars);
|
||||||
|
logger.info(ig_gis);
|
||||||
|
toreturn = Http.url("https://www.instagram.com/graphql/query/?query_hash=" + qHash + "&variables=" + vars
|
||||||
|
).header("x-instagram-gis", ig_gis).cookies(cookies).ignoreContentType().get();
|
||||||
if (!pageHasImages(toreturn)) {
|
if (!pageHasImages(toreturn)) {
|
||||||
throw new IOException("No more pages");
|
throw new IOException("No more pages");
|
||||||
}
|
}
|
||||||
@ -358,7 +397,6 @@ public class InstagramRipper extends AbstractHTMLRipper {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private boolean pageHasImages(Document doc) {
|
private boolean pageHasImages(Document doc) {
|
||||||
logger.info("BAD DATA: " + stripHTMLTags(doc.html()));
|
|
||||||
JSONObject json = new JSONObject(stripHTMLTags(doc.html()));
|
JSONObject json = new JSONObject(stripHTMLTags(doc.html()));
|
||||||
int numberOfImages = json.getJSONObject("data").getJSONObject("user")
|
int numberOfImages = json.getJSONObject("data").getJSONObject("user")
|
||||||
.getJSONObject("edge_owner_to_timeline_media").getJSONArray("edges").length();
|
.getJSONObject("edge_owner_to_timeline_media").getJSONArray("edges").length();
|
||||||
|
Loading…
x
Reference in New Issue
Block a user