added WebServer Wrapper

Crawl more reddit metadata
save data tp sqlite
This commit is contained in:
Niklas 2018-10-10 23:58:01 +02:00
parent 6787ee6401
commit fd6f20da6a
22 changed files with 3043 additions and 44 deletions

2
.gitignore vendored
View File

@ -120,3 +120,5 @@ history.json
.classpath .classpath
*.txt *.txt
bin/ bin/
reddit_meta.db

198
html/css/simple-grid.css Normal file
View File

@ -0,0 +1,198 @@
/**
*** SIMPLE GRID
*** (C) ZACH COLE 2016
**/
@import url(https://fonts.googleapis.com/css?family=Lato:400,300,300italic,400italic,700,700italic);
/* UNIVERSAL */
html,
body {
height: 100%;
width: 100%;
margin: 0;
padding: 0;
left: 0;
top: 0;
font-size: 100%;
}
/* POSITIONING */
.left {
text-align: left;
}
.right {
text-align: right;
}
.center {
text-align: center;
margin-left: auto;
margin-right: auto;
}
.justify {
text-align: justify;
}
/* ==== GRID SYSTEM ==== */
.container {
width: 90%;
margin-left: auto;
margin-right: auto;
}
.row {
position: relative;
width: 100%;
}
.row [class^="col"] {
float: left;
margin: 0.5rem 2%;
min-height: 0.125rem;
}
.col-1,
.col-2,
.col-3,
.col-4,
.col-5,
.col-6,
.col-7,
.col-8,
.col-9,
.col-10,
.col-11,
.col-12 {
width: 96%;
}
.col-1-sm {
width: 4.33%;
}
.col-2-sm {
width: 12.66%;
}
.col-3-sm {
width: 21%;
}
.col-4-sm {
width: 29.33%;
}
.col-5-sm {
width: 37.66%;
}
.col-6-sm {
width: 46%;
}
.col-7-sm {
width: 54.33%;
}
.col-8-sm {
width: 62.66%;
}
.col-9-sm {
width: 71%;
}
.col-10-sm {
width: 79.33%;
}
.col-11-sm {
width: 87.66%;
}
.col-12-sm {
width: 96%;
}
.row::after {
content: "";
display: table;
clear: both;
}
.hidden-sm {
display: none;
}
@media only screen and (min-width: 33.75em) { /* 540px */
.container {
width: 80%;
}
}
@media only screen and (min-width: 45em) { /* 720px */
.col-1 {
width: 4.33%;
}
.col-2 {
width: 12.66%;
}
.col-3 {
width: 21%;
}
.col-4 {
width: 29.33%;
}
.col-5 {
width: 37.66%;
}
.col-6 {
width: 46%;
}
.col-7 {
width: 54.33%;
}
.col-8 {
width: 62.66%;
}
.col-9 {
width: 71%;
}
.col-10 {
width: 79.33%;
}
.col-11 {
width: 87.66%;
}
.col-12 {
width: 96%;
}
.hidden-sm {
display: block;
}
}
@media only screen and (min-width: 60em) { /* 960px */
.container {
width: 75%;
max-width: 60rem;
}
}

1912
html/css/style.css Normal file

File diff suppressed because it is too large Load Diff

Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 31 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 KiB

View File

@ -0,0 +1,9 @@
<?xml version="1.0" encoding="utf-8"?>
<browserconfig>
<msapplication>
<tile>
<square150x150logo src="/mstile-150x150.png"/>
<TileColor>#2b5797</TileColor>
</tile>
</msapplication>
</browserconfig>

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 10 KiB

View File

@ -0,0 +1,32 @@
<?xml version="1.0" standalone="no"?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 20010904//EN"
"http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd">
<svg version="1.0" xmlns="http://www.w3.org/2000/svg"
width="512.000000pt" height="512.000000pt" viewBox="0 0 512.000000 512.000000"
preserveAspectRatio="xMidYMid meet">
<metadata>
Created by potrace 1.11, written by Peter Selinger 2001-2013
</metadata>
<g transform="translate(0.000000,512.000000) scale(0.100000,-0.100000)"
fill="#000000" stroke="none">
<path d="M2489 5101 c-118 -65 -94 -248 36 -274 17 -4 75 -9 130 -12 432 -23
800 -142 1150 -373 516 -341 875 -887 980 -1492 24 -136 30 -228 29 -410 -5
-770 -414 -1489 -1084 -1905 -285 -176 -636 -294 -965 -323 -151 -13 -409 -7
-535 12 -97 15 -276 56 -330 76 -14 5 -65 23 -115 41 -265 94 -562 281 -783
493 -330 316 -582 774 -657 1191 -4 22 -9 50 -11 63 -2 12 -6 34 -9 50 -20
109 -27 430 -12 562 55 487 262 942 591 1298 32 34 61 62 65 62 3 0 6 -59 6
-132 -1 -145 11 -187 60 -226 78 -62 202 -23 232 73 5 17 9 179 8 361 l0 331
-29 38 c-49 64 -58 65 -421 65 -361 0 -372 -2 -419 -62 -58 -77 -28 -186 61
-224 22 -9 74 -14 159 -14 l126 0 -55 -57 c-92 -98 -108 -116 -189 -224 -275
-367 -447 -808 -495 -1269 -15 -141 -9 -443 13 -615 19 -151 92 -421 160 -590
38 -95 145 -308 187 -371 18 -28 36 -57 39 -66 9 -23 179 -246 236 -308 395
-437 892 -722 1437 -825 206 -39 509 -54 710 -35 1088 102 2009 909 2259 1980
13 58 27 123 30 145 3 22 9 58 12 80 28 184 27 548 -1 725 -116 733 -520 1361
-1135 1762 -394 257 -836 399 -1290 414 -131 4 -149 3 -181 -15z"/>
<path d="M2415 4213 c-407 -40 -759 -209 -1045 -502 -242 -248 -394 -551 -451
-901 -14 -82 -18 -369 -6 -440 4 -25 10 -61 13 -80 11 -70 54 -227 83 -302
191 -493 613 -873 1121 -1009 147 -39 234 -51 410 -54 179 -3 280 8 440 50
563 144 1031 612 1175 1175 70 271 71 557 4 825 -182 725 -848 1246 -1589
1242 -69 -1 -138 -2 -155 -4z"/>
</g>
</svg>

After

Width:  |  Height:  |  Size: 1.8 KiB

View File

@ -0,0 +1,19 @@
{
"name": "",
"short_name": "",
"icons": [
{
"src": "/android-chrome-192x192.png",
"sizes": "192x192",
"type": "image/png"
},
{
"src": "/android-chrome-512x512.png",
"sizes": "512x512",
"type": "image/png"
}
],
"theme_color": "#ffffff",
"background_color": "#ffffff",
"display": "standalone"
}

BIN
html/images/top-big.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 56 KiB

289
html/index.html Normal file
View File

@ -0,0 +1,289 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1, user-scalable=no">
<meta name="apple-mobile-web-app-capable" content="yes">
<meta name="apple-mobile-web-app-status-bar-style" content="black">
<meta name="apple-mobile-web-app-title" content="ImgScroll">
<link rel="apple-touch-icon" sizes="200x200" href="/images/icons/favicon-200x200.png">
<link rel="icon" type="image/png" sizes="16x16" href="/images/icons/favicon-16x16.png" >
<link rel="icon" type="image/png" sizes="200x200" href="/images/icons/favicon-200x200.png">
<link rel="icon" type="image/png" sizes="32x32" href="/images/icons/favicon-32x32.png">
<meta name="msapplication-TileColor" content="#2b5797">
<meta name="theme-color" content="#ffffff">
<link rel="manifest" href="/manifest.json">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.7.0/css/font-awesome.css"
integrity="sha256-NuCn4IvuZXdBaFKJOAcsU2Q3ZpwbdFisd5dux4jkQ5w=" crossorigin="anonymous"/>
<link rel="stylesheet" href="css/style.css">
<link rel="stylesheet" href="css/simple-grid.css">
<link href="//cdn.rawgit.com/noelboss/featherlight/1.7.13/release/featherlight.min.css" type="text/css"
rel="stylesheet"/>
<script src="https://code.jquery.com/jquery-3.3.1.min.js"
integrity="sha256-FgpCb/KJQlLNfOu91ta32o/NMZxltwRo8QtmkMRdAu8=" crossorigin="anonymous"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery-visible/1.3.0-rc.1/jquery.visible.js"
integrity="sha256-q+RpExt086O3gG+Fns8mvpJPXbKd5fV6e/QyGGrIHlw=" crossorigin="anonymous"></script>
<script src="//cdn.rawgit.com/noelboss/featherlight/1.7.13/release/featherlight.min.js" type="text/javascript"
charset="utf-8"></script>
<title>ImgScroll</title>
</head>
<body>
<div id="top">
<div class="scroll-slider-container hidden">
<div class="scroll-slider-background"></div>
<div class="scroll-slider-slider">
<div class="center-center">
<div class="scroll-slider-element">
<div class="scroll-slider-completed" style="width: 30%;"></div>
</div>
</div>
</div>
<div class="scroll-slider-icons">
<div class="center-center"><i class="fa fa-pause"></i><i class="close fa fa-times"></i></div>
</div>
</div>
<div class="home-icon"><a href="/"><i class="fa fa-home"></i></a></div>
<div class="top">
<div class="top-overlay">
<div class="image-wrapper">
<img src="/images/top-big.jpg"/>
</div>
<div class="header">
<h1 class="header-single-item">ImgScroll</h1>
</div>
</div>
</div>
<div class="page-info-header" style="height: 223px;">
<div>
<div class="page-info-content">
<div class="page-info-line">
<strong>files: </strong>
<span id="fileCount"></span>
</div>
<div class="page-info-line">
<strong>subreddits: </strong>
<span id="subredditCount"></span>
</div>
<div class="page-info-line" id="subreddits">
</div>
</div>
</div>
</div>
</div>
<div class="grid">
</div>
<div class="container footer-container clearfix">
<div class="row">
<div class="col-12 footer" id="scrollload" style="margin-bottom: 2em; display: none">
Neuer Kram wird geladen! &#128591;
<br/>
<!--Indeterminate progress-bar-->
<div class="load-bar">
<div class="bar"></div>
<div class="bar"></div>
</div>
</div>
<div class="col-12 footer" id="scrollnext" style="margin-bottom: 25em; display: none">
Scrolle weiter um auf die nächste Seite zu gelangen! &#128071;
<br/>
</div>
</div>
</div>
<div id="checkme"></div>
<script>
var MAX_IMGS = 30;
$.urlParam = function (name) {
var results = new RegExp('[\?&]' + name + '=([^&#]*)').exec(window.location.href);
if (results == null) {
return null;
}
else {
return decodeURI(results[1]) || 0;
}
};
var lastCreated = $.urlParam('o') === null ? 2147483647 : $.urlParam('o');
var waitForIMG = 0;
var loadedImages = 0;
var loading = false;
var sub = [];
var isMobile = false;
function imageLoaded(elm) {
loadedImages++;
waitForIMG--;
if (waitForIMG === 0) {
loading = false;
if (loadedImages >= MAX_IMGS) {
$("#scrollload").hide();
$("#scrollnext").show();
} else {
$("#scrollload").show();
}
resizeAllGridItems();
}
}
// https://codepen.io/anon/pen/pxejxg
function resizeGridItem(item) {
grid = document.getElementsByClassName("grid")[0];
rowHeight = parseInt(window.getComputedStyle(grid).getPropertyValue('grid-auto-rows'));
rowGap = parseInt(window.getComputedStyle(grid).getPropertyValue('grid-row-gap'));
rowSpan = Math.ceil((item.querySelector('.content').getBoundingClientRect().height + rowGap) / (rowHeight + rowGap));
item.style.gridRowEnd = "span " + rowSpan;
item.style.height = item.clientHeight + "px";
item.style.width = item.clientWidth + "px";
}
function resizeAllGridItems() {
$(".content").removeClass("h100");
allItems = document.getElementsByClassName("item");
for (x = 0; x < allItems.length; x++) {
resizeGridItem(allItems[x]);
}
$(".content").addClass("h100");
}
function loadImages() {
loading = true;
$.ajax({
url: '/api/get',
type: "GET",
async: true,
data: {
'subreddits': $.urlParam('r'),
'offset': lastCreated
},
success: function (data) {
$.each(data, function (key, value) {
$("#scrollload").hide();
var media = "";
var src = "/api/img?iid=" + value.iid + "&id=" + value.id;
if (value.webm || value.mp4) {
media = "<video muted autoplay loop onloadeddata='imageLoaded(this);'><source src='" + src + "' type='video/" + (value.mp4 ? "mp4" : "webm") + "'></video>";
} else {
media = "<a href='" + src + "' data-featherlight='image'><img src='" + src + "' onload='imageLoaded(this);'></a>";
}
var elm = $(".grid").append("<div class='item' id='" + value.iid + "'><div class='content'>" + media + "<div class='overlay' style='" + (isMobile ? "" : "display:none") + "'><a href='https://reddit.com/r/" + value.subreddit + "'>" + value.subreddit + "</a><a href='https://reddit.com/r/" + value.subreddit + "/comments/" + value.id + "' target='_blank' style='padding-left: 0.4em;'><i class='fa fa-external-link' aria-hidden='true'></i></a>\</div></div></div>");
lastCreated = value.created;
waitForIMG++;
$("#" + value.iid).hover(function () {
$(this).find("img").addClass("hoverme");
$(this).find(".overlay").show();
},
function () {
$(this).find("img").removeClass("hoverme");
$(this).find(".overlay").hide();
});
if (value.webm || value.mp4) {
$("#" + value.iid).on("click", function (e) {
e.preventDefault();
$.featherlight($(this).find("video"));
});
}
});
},
error: function (jqXHR, textStatus, errorThrown) {
console.log(textStatus, errorThrown);
}
});
}
$(document).ready(function () {
isMobile = window.matchMedia("only screen and (max-width: 760px)").matches;
// window.addEventListener("resize", resizeAllGridItems);
$(window).scroll(function () {
if ($('#checkme').visible(true, false, 'vertical')) {
if (loadedImages >= MAX_IMGS) {
window.location = "/?r=" + $.urlParam('r') + "&o=" + lastCreated;
} else if (!loading) {
loadImages();
}
}
});
if ($.urlParam('r') !== null) {
sub = $.urlParam('r').split(",");
}
$.ajax({
url: '/api/info',
type: "GET",
async: false,
data: {
'subreddits': $.urlParam('r'),
},
success: function (data) {
if ($.urlParam('r') == null) {
$("#fileCount").html(data.files + " <i>(all files in db)</i>");
} else {
$("#fileCount").html(data.files_subreddits + " <i>(all selected subreddits)</i>");
}
$("#subredditCount").html(data.subreddits_count);
$("#subreddits").append("<strong>subreddits: </strong>");
$.each(data.subreddits, function (key, value) {
var tagClass = "subreddit";
if (sub.indexOf(value) > -1) tagClass += " reddit-selected";
$("#subreddits").append("<a href='#' class='" + tagClass + "' data-val='" + value + "'>/r/" + value + "</a>, ");
});
},
error: function (jqXHR, textStatus, errorThrown) {
console.log(textStatus, errorThrown);
}
});
$(".subreddit").on('click', function (e) {
e.preventDefault();
var subreddit = $(this).data("val");
if (sub.indexOf(subreddit) > -1) {
sub.splice(sub.indexOf(subreddit), 1);
} else {
sub.push(subreddit);
}
if (sub.length === 0) {
window.location = "/";
} else {
window.location = "/?r=" + sub.join(",");
}
});
if ($.urlParam('r') != null) loadImages();
});
</script>
</body>
</html>

20
html/manifest.json Normal file
View File

@ -0,0 +1,20 @@
{
"name": "ImgScroll",
"short_name": "ImgScroll",
"icons": [
{
"src": "/images/icons/android-chrome-192x192.png",
"sizes": "192x192",
"type": "image/png"
},
{
"src": "/images/icons/android-chrome-512x512.png",
"sizes": "512x512",
"type": "image/png"
}
],
"start_url": "/",
"display": "standalone",
"background_color": "#2b5797",
"theme_color": "#2b5797"
}

10
pom.xml
View File

@ -58,6 +58,16 @@
<artifactId>httpmime</artifactId> <artifactId>httpmime</artifactId>
<version>4.3.3</version> <version>4.3.3</version>
</dependency> </dependency>
<dependency>
<groupId>org.xerial</groupId>
<artifactId>sqlite-jdbc</artifactId>
<version>3.25.2</version>
</dependency>
<dependency>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-server</artifactId>
<version>9.4.3.v20170317</version>
</dependency>
</dependencies> </dependencies>
<build> <build>
<plugins> <plugins>

View File

@ -1,28 +1,30 @@
package com.rarchives.ripme.ripper.rippers; package com.rarchives.ripme.ripper.rippers;
import com.rarchives.ripme.ripper.AlbumRipper;
import com.rarchives.ripme.ui.RipStatusMessage;
import com.rarchives.ripme.ui.UpdateUtils;
import com.rarchives.ripme.utils.Http;
import com.rarchives.ripme.utils.RipUtils;
import com.rarchives.ripme.utils.Utils;
import org.json.JSONArray;
import org.json.JSONObject;
import org.json.JSONTokener;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.net.MalformedURLException; import java.net.MalformedURLException;
import java.net.URL; import java.net.URL;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.HashSet;
import java.util.List; import java.util.List;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import com.rarchives.ripme.ui.RipStatusMessage;
import org.json.JSONArray;
import org.json.JSONObject;
import org.json.JSONTokener;
import com.rarchives.ripme.ripper.AlbumRipper;
import com.rarchives.ripme.ui.UpdateUtils;
import com.rarchives.ripme.utils.Http;
import com.rarchives.ripme.utils.RipUtils;
import com.rarchives.ripme.utils.Utils;
import org.jsoup.Jsoup;
import javax.swing.text.Document;
import javax.swing.text.Element;
public class RedditRipper extends AlbumRipper { public class RedditRipper extends AlbumRipper {
public RedditRipper(URL url) throws IOException { public RedditRipper(URL url) throws IOException {
@ -40,6 +42,13 @@ public class RedditRipper extends AlbumRipper {
private long lastRequestTime = 0; private long lastRequestTime = 0;
private static Connection connection = null;
private Statement statement;
private PreparedStatement preparedStatement;
private HashSet<String> knownIds = new HashSet<>();
private int downloadCount = 0;
private Boolean shouldAddURL() { private Boolean shouldAddURL() {
return (alreadyDownloadedUrls >= Utils.getConfigInteger("history.end_rip_after_already_seen", 1000000000) && !isThisATest()); return (alreadyDownloadedUrls >= Utils.getConfigInteger("history.end_rip_after_already_seen", 1000000000) && !isThisATest());
} }
@ -69,19 +78,40 @@ public class RedditRipper extends AlbumRipper {
@Override @Override
public void rip() throws IOException { public void rip() throws IOException {
URL jsonURL = getJsonURL(this.url); URL jsonURL = getJsonURL(this.url);
try {
// create a database connection
connection = DriverManager.getConnection("jdbc:sqlite:reddit_meta.db");
statement = connection.createStatement();
statement.setQueryTimeout(30); // set timeout to 30 sec.
ResultSet rs = statement.executeQuery("select id from metalist");
while (rs.next()) {
knownIds.add(rs.getString("id"));
}
while (true) { while (true) {
if (shouldAddURL()) { if (shouldAddURL()) {
sendUpdate(RipStatusMessage.STATUS.DOWNLOAD_COMPLETE_HISTORY, "Already seen the last " + alreadyDownloadedUrls + " images ending rip"); sendUpdate(RipStatusMessage.STATUS.DOWNLOAD_COMPLETE_HISTORY, "Already seen the last " + alreadyDownloadedUrls + " images ending rip");
break; break;
} }
jsonURL = getAndParseAndReturnNext(jsonURL); jsonURL = getAndParseAndReturnNext(jsonURL);
if (jsonURL == null || isThisATest() || isStopped()) { if (jsonURL == null || isThisATest() || isStopped() || downloadCount >= Utils.getConfigInteger("download.max_per_session", 50)) {
break; break;
} }
} }
waitForThreads(); waitForThreads();
} catch (SQLException e) {
LOGGER.warn("[!] Reddit Meta Database error", e);
} finally {
try {
if (connection != null)
connection.close();
} catch (SQLException e) {
LOGGER.warn("[!] Reddit Meta Database error", e);
}
}
} }
private URL getAndParseAndReturnNext(URL url) throws IOException { private URL getAndParseAndReturnNext(URL url) throws IOException {
@ -105,8 +135,7 @@ public class RedditRipper extends AlbumRipper {
String nextURLString = Utils.stripURLParameter(url.toExternalForm(), "after"); String nextURLString = Utils.stripURLParameter(url.toExternalForm(), "after");
if (nextURLString.contains("?")) { if (nextURLString.contains("?")) {
nextURLString = nextURLString.concat("&after=" + data.getString("after")); nextURLString = nextURLString.concat("&after=" + data.getString("after"));
} } else {
else {
nextURLString = nextURLString.concat("?after=" + data.getString("after")); nextURLString = nextURLString.concat("?after=" + data.getString("after"));
} }
nextURL = new URL(nextURLString); nextURL = new URL(nextURLString);
@ -158,16 +187,15 @@ public class RedditRipper extends AlbumRipper {
JSONObject data = child.getJSONObject("data"); JSONObject data = child.getJSONObject("data");
if (kind.equals("t1")) { if (kind.equals("t1")) {
// Comment // Comment
handleBody(data.getString("body"), data.getString("id"), ""); handleBody(data.getString("body"), data.getString("id"), "", data.getInt("created"), data.getInt("ups"), data.getString("subreddit"));
} } else if (kind.equals("t3")) {
else if (kind.equals("t3")) {
// post // post
if (data.getBoolean("is_self")) { if (data.getBoolean("is_self")) {
// TODO Parse self text // TODO Parse self text
handleBody(data.getString("selftext"), data.getString("id"), data.getString("title")); handleBody(data.getString("selftext"), data.getString("id"), data.getString("title"), data.getInt("created"), data.getInt("ups"), data.getString("subreddit"));
} else { } else {
// Get link // Get link
handleURL(data.getString("url"), data.getString("id"), data.getString("title")); handleURL(data.getString("url"), data.getString("id"), data.getString("title"), data.getInt("created"), data.getInt("ups"), data.getString("subreddit"));
} }
if (data.has("replies") && data.get("replies") instanceof JSONObject) { if (data.has("replies") && data.get("replies") instanceof JSONObject) {
JSONArray replies = data.getJSONObject("replies") JSONArray replies = data.getJSONObject("replies")
@ -180,7 +208,7 @@ public class RedditRipper extends AlbumRipper {
} }
} }
private void handleBody(String body, String id, String title) { private void handleBody(String body, String id, String title, int created, int ups, String sub) {
Pattern p = RipUtils.getURLRegex(); Pattern p = RipUtils.getURLRegex();
Matcher m = p.matcher(body); Matcher m = p.matcher(body);
while (m.find()) { while (m.find()) {
@ -188,7 +216,7 @@ public class RedditRipper extends AlbumRipper {
while (url.endsWith(")")) { while (url.endsWith(")")) {
url = url.substring(0, url.length() - 1); url = url.substring(0, url.length() - 1);
} }
handleURL(url, id, title); handleURL(url, id, title, created, ups, sub);
} }
} }
@ -218,13 +246,28 @@ public class RedditRipper extends AlbumRipper {
} }
private void handleURL(String theUrl, String id, String title) { private void handleURL(String theUrl, String id, String title, int created, int ups, String sub) {
URL originalURL; URL originalURL;
try { try {
originalURL = new URL(theUrl); originalURL = new URL(theUrl);
} catch (MalformedURLException e) { } catch (MalformedURLException e) {
return; return;
} }
if (ups < Utils.getConfigInteger("reddit.min_ups", 100)) {
LOGGER.info("[i] Skipping " + id + " not reached minimum upvotes: " + ups + "/" + Utils.getConfigInteger("reddit.min_ups", 100));
return;
}
downloadCount++;
if (knownIds.contains(id)) {
LOGGER.info("[i] Skipping ID " + id);
return;
}
knownIds.add(id);
String subdirectory = ""; String subdirectory = "";
if (Utils.getConfigBoolean("reddit.use_sub_dirs", true)) { if (Utils.getConfigBoolean("reddit.use_sub_dirs", true)) {
if (Utils.getConfigBoolean("album_titles.save", true)) { if (Utils.getConfigBoolean("album_titles.save", true)) {
@ -250,9 +293,8 @@ public class RedditRipper extends AlbumRipper {
String savePath = this.workingDir + File.separator; String savePath = this.workingDir + File.separator;
savePath += id + "-" + url.split("/")[3] + title + ".mp4"; savePath += id + "-" + url.split("/")[3] + title + ".mp4";
addURLToDownload(parseRedditVideoMPD(urls.get(0).toExternalForm()), new File(savePath)); addURLToDownload(parseRedditVideoMPD(urls.get(0).toExternalForm()), new File(savePath));
} } else {
else { addURLToDownload(urls.get(0), id + "-" + title, "", theUrl, null);
addURLToDownload(urls.get(0), id + title, "", theUrl, null);
} }
} else if (urls.size() > 1) { } else if (urls.size() > 1) {
for (int i = 0; i < urls.size(); i++) { for (int i = 0; i < urls.size(); i++) {
@ -263,6 +305,20 @@ public class RedditRipper extends AlbumRipper {
addURLToDownload(urls.get(i), prefix, subdirectory, theUrl, null); addURLToDownload(urls.get(i), prefix, subdirectory, theUrl, null);
} }
} }
try {
preparedStatement = connection.prepareStatement("INSERT INTO metalist values (?, ?, ?, ?, ?)");
preparedStatement.setString(1, id);
preparedStatement.setString(2, sub);
preparedStatement.setInt(3, created);
preparedStatement.setString(4, title);
preparedStatement.setString(5, "file");
preparedStatement.executeUpdate();
} catch (SQLException e) {
LOGGER.warn("[!] FAILED TO INSERT META DATA", e);
}
} }
@Override @Override

View File

@ -0,0 +1,71 @@
package de.gurkengewuerz.ripmewrapper;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.Statement;
import java.util.HashSet;
import java.util.List;
import java.util.TimerTask;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.stream.Collectors;
/**
* Created by gurkengewuerz.de on 09.10.2018.
*/
public class ImageCrawler extends TimerTask {
@Override
public void run() {
Webserver.CRAWLER_IS_RUNNING = true;
Logger.getLogger(ImageCrawler.class.getName()).log(Level.INFO, ("Run Timer"));
try {
Connection connection = DriverManager.getConnection("jdbc:sqlite:reddit_meta.db");
Statement statement = connection.createStatement();
statement.setQueryTimeout(30); // set timeout to 30 sec.
HashSet<String> findIds = new HashSet<>();
ResultSet rs = statement.executeQuery("SELECT metalist.id FROM metalist LEFT JOIN filelist ON metalist.id = filelist.id WHERE filelist.path IS NULL");
while (rs.next()) {
findIds.add(rs.getString("id"));
}
List<String> pathList = Files.find(Paths.get(System.getProperty("user.dir")), 100,
(path, basicFileAttributes) -> {
File file = path.toFile();
return !file.isDirectory();
}).map(String::valueOf)
.collect(Collectors.toList());
PreparedStatement ps = connection.prepareStatement("INSERT INTO filelist VALUES (NULL, ?, ?)");
for (String s : pathList) {
String id = s.substring(s.lastIndexOf(File.separator) + 1).split("-")[0];
if (!findIds.contains(id)) continue;
ps.setString(1, id);
ps.setString(2, s);
ps.executeUpdate();
findIds.remove(id);
}
ps.close();
ps = connection.prepareStatement("DELETE FROM metalist WHERE id = ?");
for (String id : findIds) {
ps.setString(1, id);
ps.executeUpdate();
}
ps.close();
connection.close();
} catch (Exception e) {
Logger.getLogger(ImageCrawler.class.getName()).log(Level.SEVERE, null, e);
} finally {
Webserver.CRAWLER_IS_RUNNING = false;
}
}
}

View File

@ -0,0 +1,72 @@
package de.gurkengewuerz.ripmewrapper;
import de.gurkengewuerz.ripmewrapper.handler.APIHandler;
import de.gurkengewuerz.ripmewrapper.handler.StaticHandler;
import org.eclipse.jetty.server.ConnectionFactory;
import org.eclipse.jetty.server.Connector;
import org.eclipse.jetty.server.Handler;
import org.eclipse.jetty.server.HttpConnectionFactory;
import org.eclipse.jetty.server.Server;
import org.eclipse.jetty.server.handler.ContextHandler;
import org.eclipse.jetty.server.handler.ContextHandlerCollection;
import org.eclipse.jetty.server.session.SessionHandler;
import java.io.File;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.Statement;
import java.util.Timer;
/**
* Created by gurkengewuerz.de on 09.10.2018.
*/
public class Webserver {
public static boolean CRAWLER_IS_RUNNING = false;
public static void main(String... args) throws Exception {
Connection connection = DriverManager.getConnection("jdbc:sqlite:reddit_meta.db");
Statement statement = connection.createStatement();
statement.setQueryTimeout(30); // set timeout to 30 sec.
statement.executeUpdate("CREATE TABLE IF NOT EXISTS metalist (id string, subreddit string, created integer, title string, file string)");
statement.executeUpdate("CREATE TABLE IF NOT EXISTS filelist (iid INTEGER PRIMARY KEY AUTOINCREMENT, id string, path string)");
connection.close();
Server server = new Server(3030);
for (Connector y : server.getConnectors()) {
for (ConnectionFactory x : y.getConnectionFactories()) {
if (x instanceof HttpConnectionFactory) {
((HttpConnectionFactory) x).getHttpConfiguration().setSendServerVersion(false);
}
}
}
ContextHandler context_root = new ContextHandler("/");
context_root.setContextPath("/");
context_root.setHandler(new StaticHandler());
ContextHandler context_api = new ContextHandler("/api");
context_api.setContextPath("/api");
SessionHandler s = new SessionHandler();
s.setHandler(new APIHandler());
context_api.setHandler(s);
ContextHandlerCollection contexts = new ContextHandlerCollection();
contexts.setHandlers(new Handler[]{context_root, context_api});
server.setHandler(contexts);
Timer timer = new Timer();
timer.schedule(new ImageCrawler(), 0, 5 * 60 * 1000);
server.start();
server.dumpStdErr();
server.join();
}
public static File getStaticdir() {
return new File(System.getProperty("user.dir") + File.separator + "html" + File.separator);
}
}

View File

@ -0,0 +1,169 @@
package de.gurkengewuerz.ripmewrapper.handler;
import org.eclipse.jetty.server.Request;
import org.eclipse.jetty.server.handler.AbstractHandler;
import org.json.JSONArray;
import org.json.JSONObject;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import java.io.IOException;
import java.io.PrintWriter;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.logging.Level;
import java.util.logging.Logger;
/**
* Created by gurkengewuerz.de on 09.10.2018.
*/
public class APIHandler extends AbstractHandler {
@Override
public void handle(String s, Request request, HttpServletRequest httpServletRequest, HttpServletResponse httpServletResponse) throws IOException {
try {
Connection connection = DriverManager.getConnection("jdbc:sqlite:reddit_meta.db");
Statement statement = connection.createStatement();
statement.setQueryTimeout(30); // set timeout to 30 sec.
JSONObject returnObject = null;
JSONArray returnArray = null;
request.setCharacterEncoding("UTF-8");
httpServletResponse.setCharacterEncoding("UTF-8");
if (s.equals("/get")) {
httpServletResponse.setStatus(HttpServletResponse.SC_OK);
returnArray = new JSONArray();
if (request.getParameter("subreddits") != null) {
String[] subreddits = request.getParameter("subreddits").split(",");
int created = request.getParameter("offset") == null ? Integer.MAX_VALUE : Integer.valueOf(request.getParameter("offset"));
StringBuilder builder = new StringBuilder();
for (int i = 0; i < subreddits.length; i++) {
builder.append("?,");
}
PreparedStatement ps = connection.prepareStatement(
"SELECT iid, metalist.id, subreddit, created, path FROM filelist LEFT JOIN metalist ON filelist.id = metalist.id WHERE subreddit IN (" + builder.deleteCharAt(builder.length() - 1).toString() + ") AND created < ? ORDER BY created DESC LIMIT 10"
);
int index = 1;
for (String o : subreddits) {
ps.setString(index++, o);
}
ps.setInt(index, created);
ResultSet rs = ps.executeQuery();
while (rs.next()) {
JSONObject data = new JSONObject();
data.put("iid", rs.getInt("iid"));
data.put("id", rs.getString("id"));
data.put("subreddit", rs.getString("subreddit"));
data.put("created", rs.getInt("created"));
data.put("webm", rs.getString("path").endsWith(".webm"));
data.put("mp4", rs.getString("path").endsWith(".mp4"));
returnArray.put(data);
}
rs.close();
}
} else if (s.equals("/img")) {
httpServletResponse.setStatus(HttpServletResponse.SC_OK);
PreparedStatement ps = connection.prepareStatement("SELECT path FROM filelist WHERE iid = ? AND id = ?");
ps.setString(1, request.getParameter("iid"));
ps.setString(2, request.getParameter("id"));
ResultSet rs = ps.executeQuery();
if (rs.next()) {
String path = rs.getString("path");
byte[] encoded = Files.readAllBytes(Paths.get(path));
httpServletResponse.setContentType(Files.probeContentType(Paths.get(path)));// or png or gif, etc
httpServletResponse.setContentLength(encoded.length);
httpServletResponse.getOutputStream().write(encoded);
}
request.setHandled(true);
return;
} else if (s.equals("/info")) {
httpServletResponse.setStatus(HttpServletResponse.SC_OK);
returnObject = new JSONObject();
ResultSet rs = statement.executeQuery("SELECT DISTINCT subreddit FROM metalist;");
JSONArray ja = new JSONArray();
while (rs.next()) {
ja.put(rs.getString("subreddit"));
}
returnObject.put("subreddits", ja);
returnObject.put("subreddits_count", ja.length());
rs = statement.executeQuery("SELECT COUNT(*) c_all FROM filelist;");
if (rs.next()) {
returnObject.put("files", rs.getInt("c_all"));
}
if (request.getParameter("subreddits") != null) {
String[] subreddits = request.getParameter("subreddits").split(",");
StringBuilder builder = new StringBuilder();
for (int i = 0; i < subreddits.length; i++) {
builder.append("?,");
}
PreparedStatement ps = connection.prepareStatement(
"SELECT COUNT(*) c FROM filelist LEFT JOIN metalist ON filelist.id = metalist.id WHERE subreddit IN (" + builder.deleteCharAt(builder.length() - 1).toString() + ")"
);
int index = 1;
for (String o : subreddits) {
ps.setString(index++, o);
}
rs = ps.executeQuery();
if (rs.next()) {
returnObject.put("files_subreddits", rs.getInt("c"));
}
}
} else {
httpServletResponse.setStatus(HttpServletResponse.SC_FORBIDDEN);
}
httpServletResponse.setContentType("application/json; charset=utf-8");
PrintWriter out = httpServletResponse.getWriter();
if (returnObject != null) {
out.write(returnObject.toString());
} else if (returnArray != null) {
out.write(returnArray.toString());
} else {
returnObject = new JSONObject();
returnObject.put("error", "not found");
out.write(returnObject.toString());
}
request.setHandled(true);
connection.close();
} catch (SQLException e) {
Logger.getLogger(getClass().getName()).log(Level.SEVERE, null, e);
}
}
}

View File

@ -0,0 +1,140 @@
package de.gurkengewuerz.ripmewrapper.handler;
import de.gurkengewuerz.ripmewrapper.Webserver;
import org.apache.commons.io.FilenameUtils;
import org.eclipse.jetty.server.Request;
import org.eclipse.jetty.server.handler.AbstractHandler;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Created by gurkengewuerz.de on 09.10.2018.
*/
public class StaticHandler extends AbstractHandler {
private String get403() throws IOException {
File f = new File(Webserver.getStaticdir(), "403.html");
if (f.exists() && f.canRead()) return getFile(f, false);
return "<h1>403 Forbidden</h1><br/><img src='https://http.cat/403' alt='403 forbidden'/>";
}
private String get404() throws IOException {
File f = new File(Webserver.getStaticdir(), "404.html");
if (f.exists() && f.canRead()) return getFile(f, false);
return "<h1>404 Not Found</h1><br/><img src='https://http.cat/404' alt='404 not found'/>";
}
private byte[] getBytes(File f) throws IOException {
return Files.readAllBytes(f.toPath());
}
private String getContent(File f) throws IOException {
return new String(getBytes(f), "utf-8");
}
private String getFile(File f, boolean get) throws IOException {
String data = getContent(f);
if (get) {
if (data.startsWith("<protected/>")) {
return get403();
}
}
data = data.replace("<protected/>", "");
Pattern pattern = Pattern.compile("<contain file=\"(.*?)\"/>");
Matcher matcher = pattern.matcher(data);
List<String> matches = new ArrayList<>();
while (matcher.find()) {
matches.add(matcher.group());
}
for (String match : matches) {
Pattern pPattern = Pattern.compile("<contain\\sfile=\"([^\"]+)");
Matcher pMatcher = pPattern.matcher(match);
if (pMatcher.find()) {
File fContain = new File(Webserver.getStaticdir(), pMatcher.group(1));
if (fContain.exists() && fContain.isFile() && fContain.canRead()) {
data = data.replace(match, getFile(fContain, false));
} else {
Logger.getLogger(StaticHandler.class.getName()).log(Level.SEVERE, "File not exists " + pMatcher.group(1));
}
}
}
return data;
}
@Override
public void handle(String s, Request request, HttpServletRequest httpServletRequest, HttpServletResponse httpServletResponse) throws IOException, ServletException {
Logger.getLogger(StaticHandler.class.getName()).log(Level.INFO, "Request by " + request.getRemoteAddr() + "@" + s);
File f = new File(Webserver.getStaticdir(), s);
if (f.isDirectory()) {
File tmp = new File(f.getAbsolutePath(), "index.html");
if (tmp.exists()) f = tmp;
}
String mime = Files.probeContentType(f.toPath());
PrintWriter out = null;
try {
Paths.get(f.getAbsolutePath());
if (!f.exists()) {
out = httpServletResponse.getWriter();
out.write(get404());
httpServletResponse.setStatus(HttpServletResponse.SC_NOT_FOUND);
} else if (f.isDirectory() || !f.canRead()) {
out = httpServletResponse.getWriter();
out.write(get403());
httpServletResponse.setStatus(HttpServletResponse.SC_FORBIDDEN);
} else if (f.isFile()) {
if (mime.startsWith("text/")) {
out = httpServletResponse.getWriter();
out.write(getFile(f, true));
} else {
httpServletResponse.getOutputStream().write(getBytes(f));
}
httpServletResponse.setStatus(HttpServletResponse.SC_OK);
}
} catch (Exception ex) {
out = httpServletResponse.getWriter();
out.write(get403());
}
switch (FilenameUtils.getExtension(f.getAbsolutePath())) {
case "eot":
mime = "application/vnd.ms-fontobject";
break;
case "ttf":
mime = "application/octet-stream";
break;
case "svg":
mime = "image/svg+xml";
break;
case "woff":
mime = "application/font-woff";
break;
case "woff2":
mime = "font/woff2";
break;
}
if (mime == null) mime = "text/html";
httpServletResponse.setContentType(mime + "; charset=utf-8");
request.setHandled(true);
}
}