removed duplicate files by using md5 hash
This commit is contained in:
parent
d179895318
commit
6d22a5f579
@ -1,5 +1,7 @@
|
|||||||
package de.gurkengewuerz.ripmewrapper;
|
package de.gurkengewuerz.ripmewrapper;
|
||||||
|
|
||||||
|
import org.apache.commons.codec.digest.DigestUtils;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileInputStream;
|
import java.io.FileInputStream;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
@ -44,13 +46,19 @@ public class ImageCrawler extends TimerTask {
|
|||||||
}).map(String::valueOf)
|
}).map(String::valueOf)
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
|
|
||||||
PreparedStatement ps = connection.prepareStatement("INSERT INTO filelist VALUES (NULL, ?, ?)");
|
PreparedStatement ps = connection.prepareStatement("INSERT INTO filelist VALUES (NULL, ?, ?, ?)");
|
||||||
HashSet<String> foundIds = new HashSet<>();
|
HashSet<String> foundIds = new HashSet<>();
|
||||||
for (String s : pathList) {
|
for (String s : pathList) {
|
||||||
String id = s.substring(s.lastIndexOf(File.separator) + 1).split("-")[0];
|
String id = s.substring(s.lastIndexOf(File.separator) + 1).split("-")[0];
|
||||||
if (!findIds.contains(id)) continue;
|
if (!findIds.contains(id)) continue;
|
||||||
|
|
||||||
|
FileInputStream fis = new FileInputStream(new File(s));
|
||||||
|
String md5 = DigestUtils.md5Hex(fis);
|
||||||
|
fis.close();
|
||||||
|
|
||||||
ps.setString(1, id);
|
ps.setString(1, id);
|
||||||
ps.setString(2, s);
|
ps.setString(2, s);
|
||||||
|
ps.setString(3, md5);
|
||||||
ps.executeUpdate();
|
ps.executeUpdate();
|
||||||
foundIds.add(id);
|
foundIds.add(id);
|
||||||
}
|
}
|
||||||
|
@ -30,7 +30,7 @@ public class Webserver {
|
|||||||
statement.setQueryTimeout(30); // set timeout to 30 sec.
|
statement.setQueryTimeout(30); // set timeout to 30 sec.
|
||||||
|
|
||||||
statement.executeUpdate("CREATE TABLE IF NOT EXISTS metalist (id string, subreddit string, created integer, title string, file string)");
|
statement.executeUpdate("CREATE TABLE IF NOT EXISTS metalist (id string, subreddit string, created integer, title string, file string)");
|
||||||
statement.executeUpdate("CREATE TABLE IF NOT EXISTS filelist (iid INTEGER PRIMARY KEY AUTOINCREMENT, id string, path string)");
|
statement.executeUpdate("CREATE TABLE IF NOT EXISTS filelist (iid INTEGER PRIMARY KEY AUTOINCREMENT, id string, path string, md5 string)");
|
||||||
connection.close();
|
connection.close();
|
||||||
|
|
||||||
|
|
||||||
|
@ -61,7 +61,7 @@ public class APIHandler extends AbstractHandler {
|
|||||||
|
|
||||||
if (offset != -1) {
|
if (offset != -1) {
|
||||||
PreparedStatement psPre = connection.prepareStatement(
|
PreparedStatement psPre = connection.prepareStatement(
|
||||||
"SELECT iid FROM filelist LEFT JOIN metalist ON filelist.id = metalist.id WHERE subreddit IN (" + builderString + ") ORDER BY created DESC, iid DESC"
|
"SELECT iid FROM filelist LEFT JOIN metalist ON filelist.id = metalist.id WHERE subreddit IN (" + builderString + ") GROUP BY md5 ORDER BY created DESC, iid DESC"
|
||||||
);
|
);
|
||||||
|
|
||||||
for (String o : subreddits) {
|
for (String o : subreddits) {
|
||||||
@ -82,7 +82,7 @@ public class APIHandler extends AbstractHandler {
|
|||||||
// ----------------
|
// ----------------
|
||||||
|
|
||||||
PreparedStatement ps = connection.prepareStatement(
|
PreparedStatement ps = connection.prepareStatement(
|
||||||
"SELECT iid, metalist.id, subreddit, created, path FROM filelist LEFT JOIN metalist ON filelist.id = metalist.id WHERE subreddit IN (" + builderString + ") ORDER BY created DESC, iid DESC LIMIT 10 OFFSET ?"
|
"SELECT iid, metalist.id, subreddit, created, path FROM filelist LEFT JOIN metalist ON filelist.id = metalist.id WHERE subreddit IN (" + builderString + ") GROUP BY md5 ORDER BY created DESC, iid DESC LIMIT 10 OFFSET ?"
|
||||||
);
|
);
|
||||||
|
|
||||||
index = 1;
|
index = 1;
|
||||||
|
Loading…
Reference in New Issue
Block a user