removed duplicate files by using md5 hash
This commit is contained in:
parent
d179895318
commit
6d22a5f579
@ -1,5 +1,7 @@
|
||||
package de.gurkengewuerz.ripmewrapper;
|
||||
|
||||
import org.apache.commons.codec.digest.DigestUtils;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.nio.file.Files;
|
||||
@ -44,13 +46,19 @@ public class ImageCrawler extends TimerTask {
|
||||
}).map(String::valueOf)
|
||||
.collect(Collectors.toList());
|
||||
|
||||
PreparedStatement ps = connection.prepareStatement("INSERT INTO filelist VALUES (NULL, ?, ?)");
|
||||
PreparedStatement ps = connection.prepareStatement("INSERT INTO filelist VALUES (NULL, ?, ?, ?)");
|
||||
HashSet<String> foundIds = new HashSet<>();
|
||||
for (String s : pathList) {
|
||||
String id = s.substring(s.lastIndexOf(File.separator) + 1).split("-")[0];
|
||||
if (!findIds.contains(id)) continue;
|
||||
|
||||
FileInputStream fis = new FileInputStream(new File(s));
|
||||
String md5 = DigestUtils.md5Hex(fis);
|
||||
fis.close();
|
||||
|
||||
ps.setString(1, id);
|
||||
ps.setString(2, s);
|
||||
ps.setString(3, md5);
|
||||
ps.executeUpdate();
|
||||
foundIds.add(id);
|
||||
}
|
||||
|
@ -30,7 +30,7 @@ public class Webserver {
|
||||
statement.setQueryTimeout(30); // set timeout to 30 sec.
|
||||
|
||||
statement.executeUpdate("CREATE TABLE IF NOT EXISTS metalist (id string, subreddit string, created integer, title string, file string)");
|
||||
statement.executeUpdate("CREATE TABLE IF NOT EXISTS filelist (iid INTEGER PRIMARY KEY AUTOINCREMENT, id string, path string)");
|
||||
statement.executeUpdate("CREATE TABLE IF NOT EXISTS filelist (iid INTEGER PRIMARY KEY AUTOINCREMENT, id string, path string, md5 string)");
|
||||
connection.close();
|
||||
|
||||
|
||||
|
@ -61,7 +61,7 @@ public class APIHandler extends AbstractHandler {
|
||||
|
||||
if (offset != -1) {
|
||||
PreparedStatement psPre = connection.prepareStatement(
|
||||
"SELECT iid FROM filelist LEFT JOIN metalist ON filelist.id = metalist.id WHERE subreddit IN (" + builderString + ") ORDER BY created DESC, iid DESC"
|
||||
"SELECT iid FROM filelist LEFT JOIN metalist ON filelist.id = metalist.id WHERE subreddit IN (" + builderString + ") GROUP BY md5 ORDER BY created DESC, iid DESC"
|
||||
);
|
||||
|
||||
for (String o : subreddits) {
|
||||
@ -82,7 +82,7 @@ public class APIHandler extends AbstractHandler {
|
||||
// ----------------
|
||||
|
||||
PreparedStatement ps = connection.prepareStatement(
|
||||
"SELECT iid, metalist.id, subreddit, created, path FROM filelist LEFT JOIN metalist ON filelist.id = metalist.id WHERE subreddit IN (" + builderString + ") ORDER BY created DESC, iid DESC LIMIT 10 OFFSET ?"
|
||||
"SELECT iid, metalist.id, subreddit, created, path FROM filelist LEFT JOIN metalist ON filelist.id = metalist.id WHERE subreddit IN (" + builderString + ") GROUP BY md5 ORDER BY created DESC, iid DESC LIMIT 10 OFFSET ?"
|
||||
);
|
||||
|
||||
index = 1;
|
||||
|
Loading…
Reference in New Issue
Block a user