Merge pull request #450 from cyian-1756/san-url-history
Added the normalizeUrl func, which allows a ripper to normalize a url before adding it to url histroy/check if its in url history; The instagram ripper now uses this func
This commit is contained in:
commit
543d954941
@ -61,7 +61,13 @@ public abstract class AbstractRipper
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Adds a URL to the url history file
|
||||
* @param downloadedURL URL to check if downloaded
|
||||
*/
|
||||
private void writeDownloadedURL(String downloadedURL) throws IOException {
|
||||
downloadedURL = normalizeUrl(downloadedURL);
|
||||
BufferedWriter bw = null;
|
||||
FileWriter fw = null;
|
||||
try {
|
||||
@ -87,6 +93,15 @@ public abstract class AbstractRipper
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Normalize a URL
|
||||
* @param url URL to check if downloaded
|
||||
*/
|
||||
public String normalizeUrl(String url) {
|
||||
return url;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks to see if Ripme has already downloaded a URL
|
||||
* @param url URL to check if downloaded
|
||||
@ -96,6 +111,7 @@ public abstract class AbstractRipper
|
||||
*/
|
||||
private boolean hasDownloadedURL(String url) {
|
||||
File file = new File(URLHistoryFile);
|
||||
url = normalizeUrl(url);
|
||||
try {
|
||||
Scanner scanner = new Scanner(file);
|
||||
while (scanner.hasNextLine()) {
|
||||
|
@ -52,6 +52,12 @@ public class InstagramRipper extends AbstractHTMLRipper {
|
||||
return san_url;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String normalizeUrl(String url) {
|
||||
// Remove the date sig from the url
|
||||
return url.replaceAll("/[A-Z0-9]{8}/", "/");
|
||||
}
|
||||
|
||||
private List<String> getPostsFromSinglePage(Document Doc) {
|
||||
List<String> imageURLs = new ArrayList<>();
|
||||
JSONArray datas;
|
||||
|
Loading…
Reference in New Issue
Block a user