Merge pull request #450 from cyian-1756/san-url-history

Added the normalizeUrl func, which allows a ripper to normalize a url before adding it to url histroy/check if its in url history; The instagram ripper now uses this func
2018-03-06 06:47:41 -05:00 · 2018-03-06 06:47:41 -05:00 · 543d954941
commit 543d954941
parent 9f922e7965 48ffcf68d3
2 changed files with 22 additions and 0 deletions
--- a/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java
@ -61,7 +61,13 @@ public abstract class AbstractRipper
        }
    }
    /**
     * Adds a URL to the url history file
     * @param downloadedURL URL to check if downloaded
     */
    private void writeDownloadedURL(String downloadedURL) throws IOException {
        downloadedURL = normalizeUrl(downloadedURL);
        BufferedWriter bw = null;
        FileWriter fw = null;
        try {
@ -87,6 +93,15 @@ public abstract class AbstractRipper
        }
    }
    /**
     * Normalize a URL
     * @param url URL to check if downloaded
     */
    public String normalizeUrl(String url) {
        return url;
    }
    /**
     * Checks to see if Ripme has already downloaded a URL
     * @param url URL to check if downloaded
@ -96,6 +111,7 @@ public abstract class AbstractRipper
     */
    private boolean hasDownloadedURL(String url) {
        File file = new File(URLHistoryFile);
        url = normalizeUrl(url);
        try {
            Scanner scanner = new Scanner(file);
            while (scanner.hasNextLine()) {
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java
@ -52,6 +52,12 @@ public class InstagramRipper extends AbstractHTMLRipper {
        return san_url;
    }
    @Override
    public String normalizeUrl(String url) {
        // Remove the date sig from the url
        return url.replaceAll("/[A-Z0-9]{8}/", "/");
    }
    private List<String> getPostsFromSinglePage(Document Doc) {
        List<String> imageURLs = new ArrayList<>();
        JSONArray datas;