diff --git a/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java b/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java index edcff83d..ff6b4102 100644 --- a/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java @@ -61,7 +61,13 @@ public abstract class AbstractRipper } } + + /** + * Adds a URL to the url history file + * @param downloadedURL URL to check if downloaded + */ private void writeDownloadedURL(String downloadedURL) throws IOException { + downloadedURL = normalizeUrl(downloadedURL); BufferedWriter bw = null; FileWriter fw = null; try { @@ -86,6 +92,15 @@ public abstract class AbstractRipper } } } + + + /** + * Normalize a URL + * @param url URL to check if downloaded + */ + public String normalizeUrl(String url) { + return url; + } /** * Checks to see if Ripme has already downloaded a URL @@ -96,6 +111,7 @@ public abstract class AbstractRipper */ private boolean hasDownloadedURL(String url) { File file = new File(URLHistoryFile); + url = normalizeUrl(url); try { Scanner scanner = new Scanner(file); while (scanner.hasNextLine()) { diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java index 076fcfc6..d1f16535 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java @@ -52,6 +52,12 @@ public class InstagramRipper extends AbstractHTMLRipper { return san_url; } + @Override + public String normalizeUrl(String url) { + // Remove the date sig from the url + return url.replaceAll("/[A-Z0-9]{8}/", "/"); + } + private List getPostsFromSinglePage(Document Doc) { List imageURLs = new ArrayList<>(); JSONArray datas;