commit
c070f154f4
9
.github/ISSUE_TEMPLATE.md
vendored
9
.github/ISSUE_TEMPLATE.md
vendored
@ -1,12 +1,3 @@
|
|||||||
<!--
|
|
||||||
We've moved! If you are not already, please consider opening your issue at the following link:
|
|
||||||
https://github.com/RipMeApp/ripme/issues/new
|
|
||||||
|
|
||||||
If this is a bug, please fill out the information below.
|
|
||||||
Please include any additional information that would help us fix the bug.
|
|
||||||
If this is a feature request or other type of issue, provide whatever information you feel is appropriate.
|
|
||||||
-->
|
|
||||||
|
|
||||||
* Ripme version:
|
* Ripme version:
|
||||||
* Java version: <!-- (output of `java -version`) -->
|
* Java version: <!-- (output of `java -version`) -->
|
||||||
* Operating system: <!-- (if Windows, output of `ver` or `winver`) -->
|
* Operating system: <!-- (if Windows, output of `ver` or `winver`) -->
|
||||||
|
7
.github/PULL_REQUEST_TEMPLATE.md
vendored
7
.github/PULL_REQUEST_TEMPLATE.md
vendored
@ -1,10 +1,3 @@
|
|||||||
<!--
|
|
||||||
We've moved! If you are not already, please consider opening your pull request here:
|
|
||||||
https://github.com/RipMeApp/ripme/
|
|
||||||
|
|
||||||
To help us verify your change, please fill out the information below.
|
|
||||||
-->
|
|
||||||
|
|
||||||
# Category
|
# Category
|
||||||
|
|
||||||
This change is exactly one of the following (please change `[ ]` to `[x]`) to indicate which:
|
This change is exactly one of the following (please change `[ ]` to `[x]`) to indicate which:
|
||||||
|
114
.gitignore
vendored
114
.gitignore
vendored
@ -1,17 +1,121 @@
|
|||||||
|
# Created by https://www.gitignore.io/api/java,linux,macos,maven,windows
|
||||||
|
|
||||||
|
### Java ###
|
||||||
|
# Compiled class file
|
||||||
|
*.class
|
||||||
|
|
||||||
|
# Log file
|
||||||
|
*.log
|
||||||
|
|
||||||
|
# BlueJ files
|
||||||
|
*.ctxt
|
||||||
|
|
||||||
|
# Mobile Tools for Java (J2ME)
|
||||||
|
.mtj.tmp/
|
||||||
|
|
||||||
|
# Package Files #
|
||||||
|
*.jar
|
||||||
|
*.war
|
||||||
|
*.ear
|
||||||
|
*.zip
|
||||||
|
*.tar.gz
|
||||||
|
*.rar
|
||||||
|
|
||||||
|
# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
|
||||||
|
hs_err_pid*
|
||||||
|
|
||||||
|
### Linux ###
|
||||||
|
*~
|
||||||
|
|
||||||
|
# temporary files which can be created if a process still has a handle open of a deleted file
|
||||||
|
.fuse_hidden*
|
||||||
|
|
||||||
|
# KDE directory preferences
|
||||||
|
.directory
|
||||||
|
|
||||||
|
# Linux trash folder which might appear on any partition or disk
|
||||||
|
.Trash-*
|
||||||
|
|
||||||
|
# .nfs files are created when an open file is removed but is still being accessed
|
||||||
|
.nfs*
|
||||||
|
|
||||||
|
### macOS ###
|
||||||
|
*.DS_Store
|
||||||
|
.AppleDouble
|
||||||
|
.LSOverride
|
||||||
|
|
||||||
|
# Icon must end with two \r
|
||||||
|
Icon
|
||||||
|
|
||||||
|
# Thumbnails
|
||||||
|
._*
|
||||||
|
|
||||||
|
# Files that might appear in the root of a volume
|
||||||
|
.DocumentRevisions-V100
|
||||||
|
.fseventsd
|
||||||
|
.Spotlight-V100
|
||||||
|
.TemporaryItems
|
||||||
|
.Trashes
|
||||||
|
.VolumeIcon.icns
|
||||||
|
.com.apple.timemachine.donotpresent
|
||||||
|
|
||||||
|
# Directories potentially created on remote AFP share
|
||||||
|
.AppleDB
|
||||||
|
.AppleDesktop
|
||||||
|
Network Trash Folder
|
||||||
|
Temporary Items
|
||||||
|
.apdisk
|
||||||
|
|
||||||
|
### Maven ###
|
||||||
target/
|
target/
|
||||||
.DS_Store
|
pom.xml.tag
|
||||||
|
pom.xml.releaseBackup
|
||||||
|
pom.xml.versionsBackup
|
||||||
|
pom.xml.next
|
||||||
|
release.properties
|
||||||
|
dependency-reduced-pom.xml
|
||||||
|
buildNumber.properties
|
||||||
|
.mvn/timing.properties
|
||||||
|
|
||||||
|
# Avoid ignoring Maven wrapper jar file (.jar files are usually ignored)
|
||||||
|
!/.mvn/wrapper/maven-wrapper.jar
|
||||||
|
|
||||||
|
### Windows ###
|
||||||
|
# Windows thumbnail cache files
|
||||||
|
Thumbs.db
|
||||||
|
ehthumbs.db
|
||||||
|
ehthumbs_vista.db
|
||||||
|
|
||||||
|
# Folder config file
|
||||||
|
Desktop.ini
|
||||||
|
|
||||||
|
# Recycle Bin used on file shares
|
||||||
|
$RECYCLE.BIN/
|
||||||
|
|
||||||
|
# Windows Installer files
|
||||||
|
*.cab
|
||||||
|
*.msi
|
||||||
|
*.msm
|
||||||
|
*.msp
|
||||||
|
|
||||||
|
# Windows shortcuts
|
||||||
|
*.lnk
|
||||||
|
|
||||||
|
### IDEs ###
|
||||||
|
.vscode
|
||||||
|
.idea
|
||||||
|
.project
|
||||||
|
|
||||||
|
### Ripme ###
|
||||||
ripme.log
|
ripme.log
|
||||||
rips/
|
rips/
|
||||||
.history
|
.history
|
||||||
ripme.jar.update
|
ripme.jar.update
|
||||||
*.swp
|
*.swp
|
||||||
ripme.jar
|
*.properties
|
||||||
rip.properties
|
|
||||||
history.json
|
history.json
|
||||||
.idea
|
|
||||||
*.iml
|
*.iml
|
||||||
.settings/
|
.settings/
|
||||||
.classpath
|
.classpath
|
||||||
*.txt
|
*.txt
|
||||||
bin/
|
bin/
|
||||||
.vscode/
|
|
||||||
|
@ -1,3 +1,6 @@
|
|||||||
language: java
|
language: java
|
||||||
jdk:
|
jdk:
|
||||||
- oraclejdk7
|
- oraclejdk8
|
||||||
|
- openjdk8
|
||||||
|
after_success:
|
||||||
|
- mvn clean test jacoco:report coveralls:report
|
||||||
|
4
.vscode/settings.json
vendored
4
.vscode/settings.json
vendored
@ -2,7 +2,9 @@
|
|||||||
"files.exclude": {
|
"files.exclude": {
|
||||||
"target/**": true,
|
"target/**": true,
|
||||||
"**/.git": true,
|
"**/.git": true,
|
||||||
"**/.DS_Store": true
|
"**/.DS_Store": true,
|
||||||
|
"**/*.class": true,
|
||||||
|
"**/rips/**": true
|
||||||
},
|
},
|
||||||
"java.configuration.updateBuildConfiguration": "automatic"
|
"java.configuration.updateBuildConfiguration": "automatic"
|
||||||
}
|
}
|
||||||
|
@ -7,7 +7,16 @@ You can now find the latest code, issues, and releases at [RipMeApp/ripme](https
|
|||||||
|
|
||||||
Please be polite and supportive to all users and contributors. Please be inclusive of everyone regardless of race, religion, gender identity or expression, sexual preference, or tools and platform preferences. Please be helpful and stick to the engineering facts, and avoid expressing unhelpful or off-topic opinions.
|
Please be polite and supportive to all users and contributors. Please be inclusive of everyone regardless of race, religion, gender identity or expression, sexual preference, or tools and platform preferences. Please be helpful and stick to the engineering facts, and avoid expressing unhelpful or off-topic opinions.
|
||||||
|
|
||||||
Many of the sites we deal with contain NSFW (Not Safe For Work) content. Please assume any link you see is NSFW unless tagged otherwise -- i.e., SFW (Safe For Work). Please tag all links you post with either (NSFW) or (SFW) to be considerate to others who may not be browsing this repo in private.
|
|
||||||
|
# NSFW Content
|
||||||
|
|
||||||
|
**Please tag NSFW links (links to sites with adult content) with "(NSFW)"!**
|
||||||
|
|
||||||
|
Many of the sites we deal with contain NSFW (Not Safe For Work) content. Please assume any link you see is NSFW unless tagged otherwise -- i.e., SFW (Safe For Work). Please tag all links you post with either "(NSFW)" or "(SFW)" to be considerate to others who may not be browsing this repo in private or who are not interested in NSFW content.
|
||||||
|
|
||||||
|
There is a helpful plugin called uMatrix available for [Firefox](https://addons.mozilla.org/en-US/firefox/addon/umatrix/) and [Chrome](https://chrome.google.com/webstore/detail/umatrix/ogfcmafjalglgifnmanfmnieipoejdcf) which allows you to block certain types of content like media and scripts.
|
||||||
|
If you're not sure if a site might contain NSFW images or media, and you are in mixed company but want to develop a new ripper, you can block downloading images and media in the * (all sites) scope and allow requests for specific domains you trust as you go.
|
||||||
|
Being able to browse the HTML is usually the most important part of developing or fixing a ripper, so it is not necessarily important to actually see the images load.
|
||||||
|
|
||||||
|
|
||||||
# Priorities
|
# Priorities
|
||||||
@ -68,13 +77,14 @@ Good style is a tool for communicating your intent with other developers of the
|
|||||||
Some recommendations:
|
Some recommendations:
|
||||||
|
|
||||||
* Above all, be consistent!
|
* Above all, be consistent!
|
||||||
* Spaces, not tabs.
|
* Spaces, not tabs. Indents should be 4 spaces.
|
||||||
* We prefer "Egyptian brackets" (in `if`, `for`, `while`, `switch`, etc.):
|
* We prefer "Egyptian brackets" (in `if`, `for`, `while`, `switch`, etc.):
|
||||||
* `if (...) {`
|
* `if (...) {`
|
||||||
* `} else if (...) {`
|
* `} else if (...) {`
|
||||||
* `} else {`
|
* `} else {`
|
||||||
* `}`
|
* `}`
|
||||||
* Constants in `UPPER_SNAKE_CASE`
|
* Note the spacing convention above for control flow constructs (a single space on the outside of each paren)
|
||||||
|
* Constants in `UPPER_SNAKE_CASE` a.k.a. `CONST_CASE`
|
||||||
* Class names in `PascalCase` a.k.a. `UpperCamelCase`
|
* Class names in `PascalCase` a.k.a. `UpperCamelCase`
|
||||||
* Variable names in `camelCase` a.k.a. `lowerCamelCase`
|
* Variable names in `camelCase` a.k.a. `lowerCamelCase`
|
||||||
* Do not use Hungarian notation
|
* Do not use Hungarian notation
|
||||||
|
32
README.md
32
README.md
@ -1,9 +1,22 @@
|
|||||||
# RipMe
|
# RipMe [![Licensed under the MIT License](https://img.shields.io/badge/License-MIT-blue.svg)](https://github.com/RipMeApp/ripme/blob/master/LICENSE.txt) [![Join the chat at https://gitter.im/RipMeApp/Lobby](https://badges.gitter.im/RipMeApp/Lobby.svg)](https://gitter.im/RipMeApp/Lobby?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) [![Subreddit](https://img.shields.io/badge/discuss-on%20reddit-blue.svg)](https://www.reddit.com/r/ripme/)
|
||||||
|
|
||||||
[![Build Status](https://travis-ci.org/4pr0n/ripme.svg?branch=master)](https://travis-ci.org/4pr0n/ripme)
|
[![Build Status](https://travis-ci.org/RipMeApp/ripme.svg?branch=master)](https://travis-ci.org/RipMeApp/ripme)
|
||||||
[![Join the chat at https://gitter.im/RipMeApp/Lobby](https://badges.gitter.im/RipMeApp/Lobby.svg)](https://gitter.im/RipMeApp/Lobby?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
|
[![Coverage Status](https://coveralls.io/repos/github/RipMeApp/ripme/badge.svg?branch=master)](https://coveralls.io/github/RipMeApp/ripme?branch=master)
|
||||||
|
|
||||||
Album ripper for various websites. Runs on your computer. Requires Java 1.6
|
# Contribute
|
||||||
|
|
||||||
|
RipMe is maintained with ♥️ and in our limited free time by **[@MetaPrime](https://github.com/metaprime)** and **[@cyian-1756](https://github.com/cyian-1756)**. If you'd like to contribute but aren't good with code, help keep us happy with a small contribution!
|
||||||
|
|
||||||
|
[![Tip with PayPal](https://img.shields.io/badge/PayPal-Buy_us...-lightgrey.svg)](https://www.paypal.me/ripmeapp)
|
||||||
|
[![Tip with PayPal](https://img.shields.io/badge/coffee-%245-green.svg)](https://www.paypal.com/paypalme/ripmeapp/send?amount=5.00¤cyCode=USD&locale.x=en_US&country.x=US)
|
||||||
|
[![Tip with PayPal](https://img.shields.io/badge/beer-%2410-yellow.svg)](https://www.paypal.com/paypalme/ripmeapp/send?amount=10.00¤cyCode=USD&locale.x=en_US&country.x=US)
|
||||||
|
[![Tip with PayPal](https://img.shields.io/badge/lunch-%2420-orange.svg)](https://www.paypal.com/paypalme/ripmeapp/send?amount=20.00¤cyCode=USD&locale.x=en_US&country.x=US)
|
||||||
|
[![Tip with PayPal](https://img.shields.io/badge/dinner-%2450-red.svg)](https://www.paypal.com/paypalme/ripmeapp/send?amount=50.00¤cyCode=USD&locale.x=en_US&country.x=US)
|
||||||
|
[![Tip with PayPal](https://img.shields.io/badge/custom_amount-...-lightgrey.svg)](https://www.paypal.me/ripmeapp)
|
||||||
|
|
||||||
|
# About
|
||||||
|
|
||||||
|
RipMe is an album ripper for various websites. Runs on your computer. Requires Java 8.
|
||||||
|
|
||||||
![Screenshot](http://i.imgur.com/kWzhsIu.png)
|
![Screenshot](http://i.imgur.com/kWzhsIu.png)
|
||||||
|
|
||||||
@ -73,14 +86,3 @@ mvn test
|
|||||||
Please note that some tests may fail as sites change and our rippers become out of date.
|
Please note that some tests may fail as sites change and our rippers become out of date.
|
||||||
Start by building and testing a released version of RipMe
|
Start by building and testing a released version of RipMe
|
||||||
and then ensure that any changes you make do not cause more tests to break.
|
and then ensure that any changes you make do not cause more tests to break.
|
||||||
|
|
||||||
# Dependencies
|
|
||||||
|
|
||||||
* junit-3.8.1
|
|
||||||
* jsoup-1.7.3
|
|
||||||
* json-20140107
|
|
||||||
* apache-commons-configuration-1.7
|
|
||||||
* log4j-1.2.17
|
|
||||||
* commons-cli-1.2
|
|
||||||
* commons-io-1.3.2
|
|
||||||
* httpcomponents-4.3.3
|
|
||||||
|
40
docs/options.md
Normal file
40
docs/options.md
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
file.overwrite | bool | If true ripme will overwrite existing files rather than skip them
|
||||||
|
|
||||||
|
clipboard.autorip | bool | If true ripme will try to download any links in the clip board
|
||||||
|
|
||||||
|
error.skip404 | bool | Don't retry on 404 errors
|
||||||
|
|
||||||
|
download.save_order| bool | If true ripme will prefix each downloaded file with a number in the order the file was download
|
||||||
|
|
||||||
|
auto.update | bool | If true ripme will auto-update every time it's started
|
||||||
|
|
||||||
|
play.sound | bool | If true ripme will play a sound every time a rip finishes
|
||||||
|
|
||||||
|
download.show_popup| bool | TODO figure out what this is for
|
||||||
|
|
||||||
|
log.save | bool | If true ripme will save it's logs
|
||||||
|
|
||||||
|
urls_only.save | bool | If true ripme will save all urls to a text file and download no files
|
||||||
|
|
||||||
|
album_titles.save | bool | Currently does nothing
|
||||||
|
|
||||||
|
prefer.mp4 | bool | Prefer mp4 when downloading a video that has more than 1 format
|
||||||
|
|
||||||
|
download.timeout | int | File download timeout (in milliseconds)
|
||||||
|
|
||||||
|
page.timeout | int | Page download timeout (in milliseconds)
|
||||||
|
|
||||||
|
download.max_size | int | Maximum size of downloaded files in bytes
|
||||||
|
|
||||||
|
threads.size | int | The number of threads to use
|
||||||
|
|
||||||
|
twitter.auth | String | Twitter API key (Base64'd)
|
||||||
|
|
||||||
|
tumblr.auth | String | Tumblr API key
|
||||||
|
|
||||||
|
log.level | String | The debug log level (Example: Log level: Debug)
|
||||||
|
|
||||||
|
gw.api | String | TODO figure out what this is for
|
||||||
|
|
||||||
|
twitter.max_requests | int | TODO figure out what this is for
|
||||||
|
|
@ -1,17 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<actions>
|
|
||||||
<action>
|
|
||||||
<actionName>run</actionName>
|
|
||||||
<packagings>
|
|
||||||
<packaging>jar</packaging>
|
|
||||||
</packagings>
|
|
||||||
<goals>
|
|
||||||
<goal>process-classes</goal>
|
|
||||||
<goal>org.codehaus.mojo:exec-maven-plugin:1.2.1:exec</goal>
|
|
||||||
</goals>
|
|
||||||
<properties>
|
|
||||||
<exec.args>-classpath %classpath com.rarchives.ripme.App</exec.args>
|
|
||||||
<exec.executable>java</exec.executable>
|
|
||||||
</properties>
|
|
||||||
</action>
|
|
||||||
</actions>
|
|
53
patch.ps1
53
patch.ps1
@ -1,53 +0,0 @@
|
|||||||
Param (
|
|
||||||
[Parameter(Mandatory=$True)]
|
|
||||||
[string]$message
|
|
||||||
)
|
|
||||||
|
|
||||||
# This script will:
|
|
||||||
# - read current version
|
|
||||||
# - increment patch version
|
|
||||||
# - update version in a few places
|
|
||||||
# - insert new line in ripme.json with $message
|
|
||||||
|
|
||||||
$ripmeJson = (Get-Content "ripme.json") -join "`n" | ConvertFrom-Json
|
|
||||||
$currentVersion = $ripmeJson.latestVersion
|
|
||||||
|
|
||||||
Write-Output (("Current version", $currentVersion) -join ' ')
|
|
||||||
|
|
||||||
$versionFields = $currentVersion.split('.')
|
|
||||||
$patchCurr = [int]($versionFields[2])
|
|
||||||
$patchNext = $patchCurr + 1
|
|
||||||
$majorMinor = $versionFields[0..1]
|
|
||||||
$majorMinorPatch = $majorMinor + $patchNext
|
|
||||||
$nextVersion = $majorMinorPatch -join '.'
|
|
||||||
|
|
||||||
Write-Output (("Updating to", $nextVersion) -join ' ')
|
|
||||||
|
|
||||||
$substExpr = "s/${currentVersion}/${nextVersion}/"
|
|
||||||
sed src/main/java/com/rarchives/ripme/ui/UpdateUtils.java -i -e "${substExpr}"
|
|
||||||
git grep "DEFAULT_VERSION.*${nextVersion}" src/main/java/com/rarchives/ripme/ui/UpdateUtils.java
|
|
||||||
|
|
||||||
$substExpr = "s/\`"latestVersion\`" : \`"${currentVersion}\`"/\`"latestVersion\`" : \`"${nextVersion}\`"/"
|
|
||||||
sed ripme.json -i -e "${substExpr}"
|
|
||||||
git grep "latestVersion" ripme.json
|
|
||||||
|
|
||||||
$substExpr = "s/<version>${currentVersion}/<version>${nextVersion}/"
|
|
||||||
sed pom.xml -i -e "${substExpr}"
|
|
||||||
git grep "<version>${nextVersion}" pom.xml
|
|
||||||
|
|
||||||
$commitMessage = "${nextVersion}: ${message}"
|
|
||||||
|
|
||||||
$ripmeJsonLines = Get-Content "ripme.json"
|
|
||||||
$ripmeJsonHead = $ripmeJsonLines[0..2]
|
|
||||||
$ripmeJsonRest = $ripmeJsonLines[3..$ripmeJsonLines.length]
|
|
||||||
$changelogLine = " `"${commitMessage}`","
|
|
||||||
$updatedLines = $ripmeJsonHead + $changelogLine + $ripmeJsonRest + ""
|
|
||||||
$outputContent = $updatedLines -join "`n"
|
|
||||||
|
|
||||||
$outputPath = (Resolve-Path .\ripme.json).Path
|
|
||||||
$Utf8NoBomEncoding = New-Object System.Text.UTF8Encoding $False
|
|
||||||
[System.IO.File]::WriteAllText($outputPath, $outputContent, $Utf8NoBomEncoding)
|
|
||||||
|
|
||||||
git add -u
|
|
||||||
git commit -m $commitMessage
|
|
||||||
git tag $nextVersion
|
|
56
patch.py
Normal file
56
patch.py
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
import json
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
# This script will:
|
||||||
|
# - read current version
|
||||||
|
# - increment patch version
|
||||||
|
# - update version in a few places
|
||||||
|
# - insert new line in ripme.json with message
|
||||||
|
|
||||||
|
message = input('message: ')
|
||||||
|
|
||||||
|
with open('ripme.json') as dataFile:
|
||||||
|
ripmeJson = json.load(dataFile)
|
||||||
|
currentVersion = ripmeJson["latestVersion"]
|
||||||
|
|
||||||
|
print ('Current version ' + currentVersion)
|
||||||
|
|
||||||
|
versionFields = currentVersion.split('.')
|
||||||
|
patchCur = int(versionFields[2])
|
||||||
|
patchNext = patchCur + 1
|
||||||
|
majorMinor = versionFields[:2]
|
||||||
|
majorMinor.append(str(patchNext))
|
||||||
|
nextVersion = '.'.join(majorMinor)
|
||||||
|
|
||||||
|
print ('Updating to ' + nextVersion)
|
||||||
|
|
||||||
|
substrExpr = 's/' + currentVersion + '/' + nextVersion + '/'
|
||||||
|
subprocess.call(['sed', '-i', '-e', substrExpr, 'src/main/java/com/rarchives/ripme/ui/UpdateUtils.java'])
|
||||||
|
subprocess.call(['git', 'grep', 'DEFAULT_VERSION.*' + nextVersion,
|
||||||
|
'src/main/java/com/rarchives/ripme/ui/UpdateUtils.java'])
|
||||||
|
|
||||||
|
substrExpr = 's/\\\"latestVersion\\\": \\\"' + currentVersion + '\\\"/\\\"latestVersion\\\": \\\"' +\
|
||||||
|
nextVersion + '\\\"/'
|
||||||
|
subprocess.call(['sed', '-i', '-e', substrExpr, 'ripme.json'])
|
||||||
|
subprocess.call(['git', 'grep', 'latestVersion', 'ripme.json'])
|
||||||
|
|
||||||
|
substrExpr = 's/<version>' + currentVersion + '/<version>' + nextVersion + '/'
|
||||||
|
subprocess.call(['sed', '-i', '-e', substrExpr, 'pom.xml'])
|
||||||
|
subprocess.call(['git', 'grep', '<version>' + nextVersion + '</version>', 'pom.xml'])
|
||||||
|
|
||||||
|
commitMessage = nextVersion + ': ' + message
|
||||||
|
changeLogLine = ' \"' + commitMessage + '\",\n'
|
||||||
|
|
||||||
|
dataFile = open("ripme.json", "r")
|
||||||
|
ripmeJsonLines = dataFile.readlines()
|
||||||
|
ripmeJsonLines.insert(3, changeLogLine)
|
||||||
|
outputContent = ''.join(ripmeJsonLines)
|
||||||
|
dataFile.close()
|
||||||
|
|
||||||
|
dataFile = open("ripme.json", "w")
|
||||||
|
dataFile.write(outputContent)
|
||||||
|
dataFile.close()
|
||||||
|
|
||||||
|
subprocess.call(['git', 'add', '-u'])
|
||||||
|
subprocess.call(['git', 'commit', '-m', commitMessage])
|
||||||
|
subprocess.call(['git', 'tag', nextVersion])
|
25
pom.xml
25
pom.xml
@ -4,7 +4,7 @@
|
|||||||
<groupId>com.rarchives.ripme</groupId>
|
<groupId>com.rarchives.ripme</groupId>
|
||||||
<artifactId>ripme</artifactId>
|
<artifactId>ripme</artifactId>
|
||||||
<packaging>jar</packaging>
|
<packaging>jar</packaging>
|
||||||
<version>1.5.7</version>
|
<version>1.7.12</version>
|
||||||
<name>ripme</name>
|
<name>ripme</name>
|
||||||
<url>http://rip.rarchives.com</url>
|
<url>http://rip.rarchives.com</url>
|
||||||
<properties>
|
<properties>
|
||||||
@ -84,10 +84,29 @@
|
|||||||
<artifactId>maven-compiler-plugin</artifactId>
|
<artifactId>maven-compiler-plugin</artifactId>
|
||||||
<version>3.1</version>
|
<version>3.1</version>
|
||||||
<configuration>
|
<configuration>
|
||||||
<source>1.6</source>
|
<source>1.8</source>
|
||||||
<target>1.6</target>
|
<target>1.8</target>
|
||||||
</configuration>
|
</configuration>
|
||||||
</plugin>
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.eluder.coveralls</groupId>
|
||||||
|
<artifactId>coveralls-maven-plugin</artifactId>
|
||||||
|
<version>4.3.0</version>
|
||||||
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<!-- At time of writing: JaCoCo is (allegedly) the only coverage report generator that supports Java 8 -->
|
||||||
|
<groupId>org.jacoco</groupId>
|
||||||
|
<artifactId>jacoco-maven-plugin</artifactId>
|
||||||
|
<version>0.7.6.201602180812</version>
|
||||||
|
<executions>
|
||||||
|
<execution>
|
||||||
|
<id>prepare-agent</id>
|
||||||
|
<goals>
|
||||||
|
<goal>prepare-agent</goal>
|
||||||
|
</goals>
|
||||||
|
</execution>
|
||||||
|
</executions>
|
||||||
|
</plugin>
|
||||||
</plugins>
|
</plugins>
|
||||||
</build>
|
</build>
|
||||||
</project>
|
</project>
|
||||||
|
335
ripme.json
335
ripme.json
@ -1,152 +1,187 @@
|
|||||||
{
|
{
|
||||||
"latestVersion" : "1.5.7",
|
"latestVersion": "1.7.12",
|
||||||
"changeList" : [
|
"changeList": [
|
||||||
"1.5.7: Added EromeRipper",
|
"1.7.12: Instagram ripper no longer 403s on certain images",
|
||||||
"1.5.6: Fixed ImagearnRipper; Fixed SmuttyRipper",
|
"1.7.11: Added gwarchives support to the cheveretoRipper; Gfycat Tests & Fix for bad reddit submissions; instagram ripper can now be made to skip videos",
|
||||||
"1.5.5: Wordpress comic ripper Updates",
|
"1.7.10: Added basic pornpics.com ripper; Fixed hentai.cafe regex",
|
||||||
"1.5.4: Added Luscious.net ripper",
|
"1.7.9: FuraffinityRipper can now rip non-public albums; Added 2 new api keys, ripper can now download raw images from tumblr; Erome ripper now matchs links without the www; Tumblr ripper now tells the user if it hits the rate limit",
|
||||||
"1.5.3: Eroshare links redirect to Eroshae; add AerisdiesRipper",
|
"1.7.8: Forced https for tumblr image links; Fixed imgur album filenames; SankakuComplexRipper now downloads full sized images; Added dribbble.com ripper; Added comfirm button for clearing history",
|
||||||
"1.5.2: Fix Imgur titles; fix xhamster (new URL format); fixed Instagram ripping cropped pictures",
|
"1.7.7: Fixed E621 Ripper; Added unit test for zizki.com; Added unit test for Xbooru.com; Updated reddit useragent",
|
||||||
"1.5.1: Ensure update mechanism is working correctly.",
|
"1.7.6: Added OglafRipper",
|
||||||
"1.5.0: Change 'home' repo from 4pr0n/RipMe to RipMeApp/RipMe",
|
"1.7.5: Improve WordpressComicRipper; update to a modern User Agent",
|
||||||
"1.4.21: Added Chevereto ripper (hushpix.com, tag-fox.com)",
|
"1.7.4: Fix WordpressComicRipper konradokonski.com/wiory; Fix CheveretoRipper hushpix.com by adding consent cookie",
|
||||||
"1.4.20: EroshareRipper can now rip user profiles",
|
"1.7.3: Improved Aerisdies and Imagearn folders; fixed tapas.io; XhamsterRipper now uses mobile site; InstagramRipper slideshows under user profiles",
|
||||||
"1.4.19: WordpressComicRipper supports more rippers; improvements to Instagram and code quality",
|
"1.7.2: InstagramRipper: Added support for ripping individual posts",
|
||||||
"1.4.18: Fix video rippers (broken in 1.4.14)",
|
"1.7.1: Fix WordpressComicRipper's ripper for freeadultcomix.com; FuraffinityRipper can now rip public albums",
|
||||||
"1.4.17: MyHentaiComics improvements",
|
"1.7.0: Improved Webtoons folders; Added code coverage with Coveralls.io and improved unit tests; removed rippers for dead sites",
|
||||||
"1.4.16: Fix Eightmuses; Add Instagram album support",
|
"1.6.13: Added Instagram tags; improved Instagram and Pichunter regexes",
|
||||||
"1.4.15: Fixed DeviantArt Ripper",
|
"1.6.12: Fix InstagramRipper with timestamps; Pichunter galleries support; logging improvements",
|
||||||
"1.4.14: Improvements to ChanRipper (rip external links), MyHentaiComics, and Twitter (video and albums)",
|
"1.6.11: Added pichunter.com ripper; Improved Instagram filenames; added tehyiffgallery ripper; Fixed xchan ripper; Fixed chanRipper folders",
|
||||||
"1.4.13: Fixed furaffinity ripper.",
|
"1.6.10: Added viewcomic ripper; Fixed webtoons malformed url error message; Fixed chan ripper thread title; Fixed Modelmayhem ripper",
|
||||||
"1.4.12: Fixed Crash on Win10 CU; Fixed SSL error on xHamster.",
|
"1.6.9: Added support for imgur /t/ albums; Added portable mode; Unit tests no longer fail if run twice; Formating fixes",
|
||||||
"1.4.11: Instagram: fixed cropped images issue.",
|
"1.6.8: code clean up; ripme can now remeber and skip already downloaded images",
|
||||||
"1.4.10: Add WordPressComicRipper (various sites supported)",
|
"1.6.7: Fixed instagram ripper",
|
||||||
"1.4.9: Fixed HentaiFoundry ripper",
|
"1.6.6: Fixed 8muses ripper",
|
||||||
"1.4.8: Added Jagodibuja comics ripper",
|
"1.6.5: Imgbox ripper now downloads full sized image from galleries",
|
||||||
"1.4.7: Fixed NewsFilter, XHamster; added TheChiveRipper",
|
"1.6.4: Added webtoons ripper",
|
||||||
"1.4.6: Eroshare: get album names; Imgur: improve grabbing album name.",
|
"1.6.3: Window is now resizable; Added Porncomix.info ripper; Fixed imgbox ripper; Added hentai2read ripper",
|
||||||
"1.4.5: SinnerComics: Added work around for naming bug",
|
"1.6.2: Fixed shesfreaky.com ripper; Fixed imgbox ripper; Fixed Xhamster video ripping",
|
||||||
"1.4.4: Added SinnerComics, MyHentaiComics rippers; improve E621 ripper.",
|
"1.6.1: Rolled E621Ripper back from 1.6.0 to the 1.5.15 version",
|
||||||
"1.4.3: Add missing subdomain for 4chan; fix ehentai, 8muses; add zizki ripper.",
|
"1.6.0: Updated to java 8; Some code cleanup",
|
||||||
"1.4.2: Added nhentai ripper.",
|
"1.5.15: Added Hbrowse.com ripper; 8muses ripper now can rip from all album types",
|
||||||
"1.4.1: Fixed Imgbox: correctly downloads full-size images.",
|
"1.5.14: Myhentaicomics ripper no longer tries to download ads; Added hentai.cafe ripper; Fixed sankakucomplex ripper",
|
||||||
"1.4.0: Fixed update mechanism. Some improvements to Imgur, etc.",
|
"1.5.13: InstagramRipper: fixed minor bug",
|
||||||
"1.3.0: Fix Instagram, Tumblr, xHamster, 4chan, 8muses. Some new features.",
|
"1.5.12: Make tray icon optional; work around window positioning bug on Windows.",
|
||||||
"1.2.13: Hotfix for imgur album rips",
|
"1.5.11: Added -v, --version flag",
|
||||||
"1.2.12: 500px gallery/subgallery support",
|
"1.5.10: Added ripper for cfake.com; Fixed nhentai album naming",
|
||||||
"1.2.11: Deviant fav subfolders, and reddituploads support",
|
"1.5.9: InstagramRipper now downloads full sized images; ImagefapRipper Now adds GID to folder name",
|
||||||
"1.2.10: Imgur /gallery/ images fix",
|
"1.5.8: Fixed 8muses ripper",
|
||||||
"1.2.9: Imgur 10-image fix, original twitter sizes",
|
"1.5.7: Added EromeRipper",
|
||||||
"1.2.8: Option to prefer MP4 over GIF for imgur",
|
"1.5.6: Fixed ImagearnRipper; Fixed SmuttyRipper",
|
||||||
"1.2.7: Fix 500px ripper to fetch NSFW images",
|
"1.5.5: Wordpress comic ripper Updates",
|
||||||
"1.2.6: Fix 500px ripper",
|
"1.5.4: Added Luscious.net ripper",
|
||||||
"1.2.5: Descriptions are optional, minor imgur fixes",
|
"1.5.3: Eroshare links redirect to Eroshae; add AerisdiesRipper",
|
||||||
"1.2.4: Fix instagram ripper",
|
"1.5.2: Fix Imgur titles; fix xhamster (new URL format); fixed Instagram ripping cropped pictures",
|
||||||
"1.2.3: Fix xhamster videos, option to remove/clear Queue",
|
"1.5.1: Ensure update mechanism is working correctly.",
|
||||||
"1.2.2: Fix imagefap ripper",
|
"1.5.0: Change 'home' repo from 4pr0n/RipMe to RipMeApp/RipMe",
|
||||||
"1.2.1: Gfycat Fix, lots of changes pushed",
|
"1.4.21: Added Chevereto ripper (hushpix.com, tag-fox.com)",
|
||||||
"1.2.0: Fix imagebam, 8muses. Remember queue items",
|
"1.4.20: EroshareRipper can now rip user profiles",
|
||||||
"1.1.9: Hotfix for new imgur album layout",
|
"1.4.19: WordpressComicRipper supports more rippers; improvements to Instagram and code quality",
|
||||||
"1.1.8: Fix for failed reddit rips",
|
"1.4.18: Fix video rippers (broken in 1.4.14)",
|
||||||
"1.1.7: Imagefap fix, corrupt history crash fix, deviantart 403 fix",
|
"1.4.17: MyHentaiComics improvements",
|
||||||
"1.1.6: History error handling and drawchan support",
|
"1.4.16: Fix Eightmuses; Add Instagram album support",
|
||||||
"1.1.5: Fix imagefap and 8muses rippers",
|
"1.4.15: Fixed DeviantArt Ripper",
|
||||||
"1.1.4: Fix DeviantArt 403 errors",
|
"1.4.14: Improvements to ChanRipper (rip external links), MyHentaiComics, and Twitter (video and albums)",
|
||||||
"1.1.3: Fix Check Selected in History",
|
"1.4.13: Fixed furaffinity ripper.",
|
||||||
"1.1.2: Check/Uncheck history by right-clicking the history",
|
"1.4.12: Fixed Crash on Win10 CU; Fixed SSL error on xHamster.",
|
||||||
"1.1.1: Gfycat/Reddit fix",
|
"1.4.11: Instagram: fixed cropped images issue.",
|
||||||
"1.1.0: Revamped History, Cheeby fix",
|
"1.4.10: Add WordPressComicRipper (various sites supported)",
|
||||||
"1.0.93: Reddit fix, gfycat fix, video download fix",
|
"1.4.9: Fixed HentaiFoundry ripper",
|
||||||
"1.0.92: Anon-ib fix, cheeby fix, vid.me ripper",
|
"1.4.8: Added Jagodibuja comics ripper",
|
||||||
"1.0.91: Fix for anon-ib, minus rippers",
|
"1.4.7: Fixed NewsFilter, XHamster; added TheChiveRipper",
|
||||||
"1.0.90: Hide error message when ripping valid album",
|
"1.4.6: Eroshare: get album names; Imgur: improve grabbing album name.",
|
||||||
"1.0.89: Fix fapproved ripper",
|
"1.4.5: SinnerComics: Added work around for naming bug",
|
||||||
"1.0.88: Fix imgbox ripper",
|
"1.4.4: Added SinnerComics, MyHentaiComics rippers; improve E621 ripper.",
|
||||||
"1.0.87: Chan ripper update, Finebox update, Motherless video ripper",
|
"1.4.3: Add missing subdomain for 4chan; fix ehentai, 8muses; add zizki ripper.",
|
||||||
"1.0.86: Fix for imagefap albums larger than 1k images",
|
"1.4.2: Added nhentai ripper.",
|
||||||
"1.0.85: Fix Modelmayhem ripper",
|
"1.4.1: Fixed Imgbox: correctly downloads full-size images.",
|
||||||
"1.0.84: Ripper can resume after being stopped",
|
"1.4.0: Fixed update mechanism. Some improvements to Imgur, etc.",
|
||||||
"1.0.83: Fix 2dgalleries ripper",
|
"1.3.0: Fix Instagram, Tumblr, xHamster, 4chan, 8muses. Some new features.",
|
||||||
"1.0.82: Photobucket ripper fix, Clipboard Autorip toggle",
|
"1.2.13: Hotfix for imgur album rips",
|
||||||
"1.0.81: Tumblr/seenive fixes, queue system, better history",
|
"1.2.12: 500px gallery/subgallery support",
|
||||||
"1.0.80: Fix Butttoucher ripper",
|
"1.2.11: Deviant fav subfolders, and reddituploads support",
|
||||||
"1.0.79: Fix cheeby to rip all images",
|
"1.2.10: Imgur /gallery/ images fix",
|
||||||
"1.0.78: BCFakes ripper",
|
"1.2.9: Imgur 10-image fix, original twitter sizes",
|
||||||
"1.0.77: Cheeby ripper, status in title, various fixes",
|
"1.2.8: Option to prefer MP4 over GIF for imgur",
|
||||||
"1.0.76: Option to only save URLs, Taptastic ripper",
|
"1.2.7: Fix 500px ripper to fetch NSFW images",
|
||||||
"1.0.75: 500px ripper",
|
"1.2.6: Fix 500px ripper",
|
||||||
"1.0.74: Videarn video ripper",
|
"1.2.5: Descriptions are optional, minor imgur fixes",
|
||||||
"1.0.73: Datw.in ripper",
|
"1.2.4: Fix instagram ripper",
|
||||||
"1.0.72: Support for DeviantArt favourites",
|
"1.2.3: Fix xhamster videos, option to remove/clear Queue",
|
||||||
"1.0.71: Fuskator ripper",
|
"1.2.2: Fix imagefap ripper",
|
||||||
"1.0.70: Various improvements. Xhamster, Cliphunter video rippers",
|
"1.2.1: Gfycat Fix, lots of changes pushed",
|
||||||
"1.0.69: Gfycat video ripper, instgram username fix",
|
"1.2.0: Fix imagebam, 8muses. Remember queue items",
|
||||||
"1.0.68: Imagevenue and hentai-foundry rippers",
|
"1.1.9: Hotfix for new imgur album layout",
|
||||||
"1.0.67: Support for external tumblr domains",
|
"1.1.8: Fix for failed reddit rips",
|
||||||
"1.0.66: GirlsOfDesire ripper",
|
"1.1.7: Imagefap fix, corrupt history crash fix, deviantart 403 fix",
|
||||||
"1.0.65: Vidd.me video ripper",
|
"1.1.6: History error handling and drawchan support",
|
||||||
"1.0.64: Imagebam ripper",
|
"1.1.5: Fix imagefap and 8muses rippers",
|
||||||
"1.0.63: Hopefully fixing freezing issue while re-ripping all albums",
|
"1.1.4: Fix DeviantArt 403 errors",
|
||||||
"1.0.62: Imgur album directories named after album title",
|
"1.1.3: Fix Check Selected in History",
|
||||||
"1.0.61: Logs are optional, defaults to not save logs",
|
"1.1.2: Check/Uncheck history by right-clicking the history",
|
||||||
"1.0.60: Fix for crazy directory creation bug",
|
"1.1.1: Gfycat/Reddit fix",
|
||||||
"1.0.59: Show when albums can be ripped immediately",
|
"1.1.0: Revamped History, Cheeby fix",
|
||||||
"1.0.58: Logs are saved to album directory, ehentai fix",
|
"1.0.93: Reddit fix, gfycat fix, video download fix",
|
||||||
"1.0.57: Nfsfw ripper",
|
"1.0.92: Anon-ib fix, cheeby fix, vid.me ripper",
|
||||||
"1.0.56: Fix for imgur rips",
|
"1.0.91: Fix for anon-ib, minus rippers",
|
||||||
"1.0.55: Ehentai ripper bypasses content warning",
|
"1.0.90: Hide error message when ripping valid album",
|
||||||
"1.0.54: Mediacru.sh ripper, may require a Java update",
|
"1.0.89: Fix fapproved ripper",
|
||||||
"1.0.53: 8Muses ripper fix, can rip subalbums",
|
"1.0.88: Fix imgbox ripper",
|
||||||
"1.0.52: Imgbox ripper, popup notifications are optional",
|
"1.0.87: Chan ripper update, Finebox update, Motherless video ripper",
|
||||||
"1.0.51: Deviantart rips full-size NSFW images",
|
"1.0.86: Fix for imagefap albums larger than 1k images",
|
||||||
"1.0.50: Smutty.com ripper",
|
"1.0.85: Fix Modelmayhem ripper",
|
||||||
"1.0.49: More Ehentai ripper fixes",
|
"1.0.84: Ripper can resume after being stopped",
|
||||||
"1.0.48: Imagestash.org /tag/ ripper, ehentai fixes",
|
"1.0.83: Fix 2dgalleries ripper",
|
||||||
"1.0.47: Vidble ripper, right-click popupmenu on text",
|
"1.0.82: Photobucket ripper fix, Clipboard Autorip toggle",
|
||||||
"1.0.46: Auto-indexing filenames (001_, 002_, etc) is now optional",
|
"1.0.81: Tumblr/seenive fixes, queue system, better history",
|
||||||
"1.0.45: Imagefap /gallery/, Motherless search terms, reddit ripper fix",
|
"1.0.80: Fix Butttoucher ripper",
|
||||||
"1.0.44: Deviantart rips full-size images",
|
"1.0.79: Fix cheeby to rip all images",
|
||||||
"1.0.43: Added Modelmayhem ripper",
|
"1.0.78: BCFakes ripper",
|
||||||
"1.0.42: Added Drawcrowd ripper, bug fix for large albums",
|
"1.0.77: Cheeby ripper, status in title, various fixes",
|
||||||
"1.0.41: Fix for multi-page Deviantart galleries, secure Flickr URLs",
|
"1.0.76: Option to only save URLs, Taptastic ripper",
|
||||||
"1.0.40: Flickr bug fix and groups support",
|
"1.0.75: 500px ripper",
|
||||||
"1.0.39: Various fixes for Ehentai and Motherless",
|
"1.0.74: Videarn video ripper",
|
||||||
"1.0.38: Ehentai ripper, 4chan .webm support, optional audio confirmations",
|
"1.0.73: Datw.in ripper",
|
||||||
"1.0.37: Added Vine.co and Supertangas rippers",
|
"1.0.72: Support for DeviantArt favourites",
|
||||||
"1.0.36: Added semi-working Gifyo ripper",
|
"1.0.71: Fuskator ripper",
|
||||||
"1.0.35: Fixed i.rarchives ripper, delete empty directories",
|
"1.0.70: Various improvements. Xhamster, Cliphunter video rippers",
|
||||||
"1.0.34: Added fapproved and anonib rippers",
|
"1.0.69: Gfycat video ripper, instgram username fix",
|
||||||
"1.0.33: Imgur ripper fixes",
|
"1.0.68: Imagevenue and hentai-foundry rippers",
|
||||||
"1.0.32: Fix for directories with special characters",
|
"1.0.67: Support for external tumblr domains",
|
||||||
"1.0.31: Fix for large imgur albums",
|
"1.0.66: GirlsOfDesire ripper",
|
||||||
"1.0.30: Added Minus ripper",
|
"1.0.65: Vidd.me video ripper",
|
||||||
"1.0.29: Various fixes for tumblr, flickr, 4chan",
|
"1.0.64: Imagebam ripper",
|
||||||
"1.0.28: Added vk.com video ripper(s)",
|
"1.0.63: Hopefully fixing freezing issue while re-ripping all albums",
|
||||||
"1.0.27: Added flickr ripper",
|
"1.0.62: Imgur album directories named after album title",
|
||||||
"1.0.26: Ability to rerip history from command-line",
|
"1.0.61: Logs are optional, defaults to not save logs",
|
||||||
"1.0.25: Added photobucket ripper",
|
"1.0.60: Fix for crazy directory creation bug",
|
||||||
"1.0.24: Fixed possible deadlock issue while re-ripping albums",
|
"1.0.59: Show when albums can be ripped immediately",
|
||||||
"1.0.23: Added teenplanet, irarchives, and butttoucher support",
|
"1.0.58: Logs are saved to album directory, ehentai fix",
|
||||||
"1.0.22: Fixed huge bug where ripper did not work at all for any sites",
|
"1.0.57: Nfsfw ripper",
|
||||||
"1.0.21: Ability to rip user account images on imgur",
|
"1.0.56: Fix for imgur rips",
|
||||||
"1.0.20: Video ripper support: pornhub, youporn, beeg, xvideos",
|
"1.0.55: Ehentai ripper bypasses content warning",
|
||||||
"1.0.19: Fix imgur account ripper",
|
"1.0.54: Mediacru.sh ripper, may require a Java update",
|
||||||
"1.0.18: Button icons, kinkyshare.com ripper",
|
"1.0.53: 8Muses ripper fix, can rip subalbums",
|
||||||
"1.0.17: *chan ripper, imgur titles in filenames",
|
"1.0.52: Imgbox ripper, popup notifications are optional",
|
||||||
"1.0.16: Fix bug with instagram usernames containing _ or -",
|
"1.0.51: Deviantart rips full-size NSFW images",
|
||||||
"1.0.15: Auto-updater should be compatible with Windows",
|
"1.0.50: Smutty.com ripper",
|
||||||
"1.0.14: Fix twitter account names with _ or -",
|
"1.0.49: More Ehentai ripper fixes",
|
||||||
"1.0.13: Auto-updater is more verbose, hopefully works",
|
"1.0.48: Imagestash.org /tag/ ripper, ehentai fixes",
|
||||||
"1.0.12: Fixed clipboard autorip bug",
|
"1.0.47: Vidble ripper, right-click popupmenu on text",
|
||||||
"1.0.11: 404 images are markead as errored",
|
"1.0.46: Auto-indexing filenames (001_, 002_, etc) is now optional",
|
||||||
"1.0.10: Taskbar notifications when rips start",
|
"1.0.45: Imagefap /gallery/, Motherless search terms, reddit ripper fix",
|
||||||
"1.0.9: More-verbose completion, UI tweaks",
|
"1.0.44: Deviantart rips full-size images",
|
||||||
"1.0.8: Auto-update functionality",
|
"1.0.43: Added Modelmayhem ripper",
|
||||||
"1.0.7: Clipboard Autorip and tray icons",
|
"1.0.42: Added Drawcrowd ripper, bug fix for large albums",
|
||||||
"1.0.6: Support imgur.com/r/subreddit albums",
|
"1.0.41: Fix for multi-page Deviantart galleries, secure Flickr URLs",
|
||||||
"1.0.5: Persistent configuration, small bug fixes",
|
"1.0.40: Flickr bug fix and groups support",
|
||||||
"1.0.4: Fixed spaces-in-directory bug",
|
"1.0.39: Various fixes for Ehentai and Motherless",
|
||||||
"1.0.3: Added VK.com ripper",
|
"1.0.38: Ehentai ripper, 4chan .webm support, optional audio confirmations",
|
||||||
"1.0.1: Added auto-update functionality"
|
"1.0.37: Added Vine.co and Supertangas rippers",
|
||||||
]
|
"1.0.36: Added semi-working Gifyo ripper",
|
||||||
|
"1.0.35: Fixed i.rarchives ripper, delete empty directories",
|
||||||
|
"1.0.34: Added fapproved and anonib rippers",
|
||||||
|
"1.0.33: Imgur ripper fixes",
|
||||||
|
"1.0.32: Fix for directories with special characters",
|
||||||
|
"1.0.31: Fix for large imgur albums",
|
||||||
|
"1.0.30: Added Minus ripper",
|
||||||
|
"1.0.29: Various fixes for tumblr, flickr, 4chan",
|
||||||
|
"1.0.28: Added vk.com video ripper(s)",
|
||||||
|
"1.0.27: Added flickr ripper",
|
||||||
|
"1.0.26: Ability to rerip history from command-line",
|
||||||
|
"1.0.25: Added photobucket ripper",
|
||||||
|
"1.0.24: Fixed possible deadlock issue while re-ripping albums",
|
||||||
|
"1.0.23: Added teenplanet, irarchives, and butttoucher support",
|
||||||
|
"1.0.22: Fixed huge bug where ripper did not work at all for any sites",
|
||||||
|
"1.0.21: Ability to rip user account images on imgur",
|
||||||
|
"1.0.20: Video ripper support: pornhub, youporn, beeg, xvideos",
|
||||||
|
"1.0.19: Fix imgur account ripper",
|
||||||
|
"1.0.18: Button icons, kinkyshare.com ripper",
|
||||||
|
"1.0.17: *chan ripper, imgur titles in filenames",
|
||||||
|
"1.0.16: Fix bug with instagram usernames containing _ or -",
|
||||||
|
"1.0.15: Auto-updater should be compatible with Windows",
|
||||||
|
"1.0.14: Fix twitter account names with _ or -",
|
||||||
|
"1.0.13: Auto-updater is more verbose, hopefully works",
|
||||||
|
"1.0.12: Fixed clipboard autorip bug",
|
||||||
|
"1.0.11: 404 images are markead as errored",
|
||||||
|
"1.0.10: Taskbar notifications when rips start",
|
||||||
|
"1.0.9: More-verbose completion, UI tweaks",
|
||||||
|
"1.0.8: Auto-update functionality",
|
||||||
|
"1.0.7: Clipboard Autorip and tray icons",
|
||||||
|
"1.0.6: Support imgur.com/r/subreddit albums",
|
||||||
|
"1.0.5: Persistent configuration, small bug fixes",
|
||||||
|
"1.0.4: Fixed spaces-in-directory bug",
|
||||||
|
"1.0.3: Added VK.com ripper",
|
||||||
|
"1.0.1: Added auto-update functionality"
|
||||||
|
]
|
||||||
}
|
}
|
||||||
|
@ -1,7 +1,6 @@
|
|||||||
package com.rarchives.ripme;
|
package com.rarchives.ripme;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FilenameFilter;
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.BufferedReader;
|
import java.io.BufferedReader;
|
||||||
import java.io.FileReader;
|
import java.io.FileReader;
|
||||||
@ -35,31 +34,51 @@ import com.rarchives.ripme.utils.Utils;
|
|||||||
*/
|
*/
|
||||||
public class App {
|
public class App {
|
||||||
|
|
||||||
public static Logger logger;
|
public static final Logger logger;
|
||||||
private static final History HISTORY = new History();
|
private static final History HISTORY = new History();
|
||||||
|
|
||||||
public static void main(String[] args) throws MalformedURLException {
|
static {
|
||||||
|
//initialize logger
|
||||||
Utils.configureLogger();
|
Utils.configureLogger();
|
||||||
|
logger = Logger.getLogger(App.class);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void main(String[] args) throws MalformedURLException {
|
||||||
|
CommandLine cl = getArgs(args);
|
||||||
|
if (args.length > 0 && cl.hasOption('v')){
|
||||||
|
logger.error(UpdateUtils.getThisJarVersion());
|
||||||
|
System.exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
System.setProperty("apple.laf.useScreenMenuBar", "true");
|
System.setProperty("apple.laf.useScreenMenuBar", "true");
|
||||||
System.setProperty("com.apple.mrj.application.apple.menu.about.name", "RipMe");
|
System.setProperty("com.apple.mrj.application.apple.menu.about.name", "RipMe");
|
||||||
logger = Logger.getLogger(App.class);
|
|
||||||
logger.info("Initialized ripme v" + UpdateUtils.getThisJarVersion());
|
logger.info("Initialized ripme v" + UpdateUtils.getThisJarVersion());
|
||||||
|
|
||||||
if (args.length > 0) {
|
if (args.length > 0) {
|
||||||
|
// CLI Mode
|
||||||
handleArguments(args);
|
handleArguments(args);
|
||||||
} else {
|
} else {
|
||||||
|
// GUI Mode
|
||||||
MainWindow mw = new MainWindow();
|
MainWindow mw = new MainWindow();
|
||||||
SwingUtilities.invokeLater(mw);
|
SwingUtilities.invokeLater(mw);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
/**
|
||||||
public static void rip(URL url) throws Exception {
|
* Creates an abstract ripper and instructs it to rip.
|
||||||
|
* @param url URL to be ripped
|
||||||
|
* @throws Exception
|
||||||
|
*/
|
||||||
|
private static void rip(URL url) throws Exception {
|
||||||
AbstractRipper ripper = AbstractRipper.getRipper(url);
|
AbstractRipper ripper = AbstractRipper.getRipper(url);
|
||||||
ripper.setup();
|
ripper.setup();
|
||||||
ripper.rip();
|
ripper.rip();
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void handleArguments(String[] args) {
|
/**
|
||||||
|
* For dealing with command-line arguments.
|
||||||
|
* @param args Array of Command-line arguments
|
||||||
|
*/
|
||||||
|
private static void handleArguments(String[] args) {
|
||||||
CommandLine cl = getArgs(args);
|
CommandLine cl = getArgs(args);
|
||||||
if (cl.hasOption('h')) {
|
if (cl.hasOption('h')) {
|
||||||
HelpFormatter hf = new HelpFormatter();
|
HelpFormatter hf = new HelpFormatter();
|
||||||
@ -98,8 +117,8 @@ public class App {
|
|||||||
}
|
}
|
||||||
if (cl.hasOption('R')) {
|
if (cl.hasOption('R')) {
|
||||||
loadHistory();
|
loadHistory();
|
||||||
if (HISTORY.toList().size() == 0) {
|
if (HISTORY.toList().isEmpty()) {
|
||||||
System.err.println("There are no history entries to re-rip. Rip some albums first");
|
logger.error("There are no history entries to re-rip. Rip some albums first");
|
||||||
System.exit(-1);
|
System.exit(-1);
|
||||||
}
|
}
|
||||||
int added = 0;
|
int added = 0;
|
||||||
@ -122,7 +141,7 @@ public class App {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (added == 0) {
|
if (added == 0) {
|
||||||
System.err.println("No history entries have been 'Checked'\n" +
|
logger.error("No history entries have been 'Checked'\n" +
|
||||||
"Check an entry by clicking the checkbox to the right of the URL or Right-click a URL to check/uncheck all items");
|
"Check an entry by clicking the checkbox to the right of the URL or Right-click a URL to check/uncheck all items");
|
||||||
System.exit(-1);
|
System.exit(-1);
|
||||||
}
|
}
|
||||||
@ -134,7 +153,7 @@ public class App {
|
|||||||
Utils.setConfigBoolean("download.save_order", false);
|
Utils.setConfigBoolean("download.save_order", false);
|
||||||
}
|
}
|
||||||
if ((cl.hasOption('d'))&&(cl.hasOption('D'))) {
|
if ((cl.hasOption('d'))&&(cl.hasOption('D'))) {
|
||||||
System.err.println("\nCannot specify '-d' and '-D' simultaneously");
|
logger.error("\nCannot specify '-d' and '-D' simultaneously");
|
||||||
System.exit(-1);
|
System.exit(-1);
|
||||||
}
|
}
|
||||||
if (cl.hasOption('l')) {
|
if (cl.hasOption('l')) {
|
||||||
@ -162,14 +181,18 @@ public class App {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// this function will attempt to rip the provided url
|
/**
|
||||||
public static void ripURL(String targetURL, boolean saveConfig) {
|
* Attempt to rip targetURL.
|
||||||
|
* @param targetURL URL to rip
|
||||||
|
* @param saveConfig Whether or not you want to save the config (?)
|
||||||
|
*/
|
||||||
|
private static void ripURL(String targetURL, boolean saveConfig) {
|
||||||
try {
|
try {
|
||||||
URL url = new URL(targetURL);
|
URL url = new URL(targetURL);
|
||||||
rip(url);
|
rip(url);
|
||||||
List<String> history = Utils.getConfigList("download.history");
|
List<String> history = Utils.getConfigList("download.history");
|
||||||
if (!history.contains(url.toExternalForm())) {
|
if (!history.contains(url.toExternalForm())) {//if you haven't already downloaded the file before
|
||||||
history.add(url.toExternalForm());
|
history.add(url.toExternalForm());//add it to history so you won't have to redownload
|
||||||
Utils.setConfigList("download.history", Arrays.asList(history.toArray()));
|
Utils.setConfigList("download.history", Arrays.asList(history.toArray()));
|
||||||
if (saveConfig) {
|
if (saveConfig) {
|
||||||
Utils.saveConfig();
|
Utils.saveConfig();
|
||||||
@ -184,7 +207,11 @@ public class App {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static Options getOptions() {
|
/**
|
||||||
|
* Creates an Options object, returns it.
|
||||||
|
* @return Returns all acceptable command-line options.
|
||||||
|
*/
|
||||||
|
private static Options getOptions() {
|
||||||
Options opts = new Options();
|
Options opts = new Options();
|
||||||
opts.addOption("h", "help", false, "Print the help");
|
opts.addOption("h", "help", false, "Print the help");
|
||||||
opts.addOption("u", "url", true, "URL of album to rip");
|
opts.addOption("u", "url", true, "URL of album to rip");
|
||||||
@ -198,31 +225,39 @@ public class App {
|
|||||||
opts.addOption("l", "ripsdirectory", true, "Rips Directory (Default: ./rips)");
|
opts.addOption("l", "ripsdirectory", true, "Rips Directory (Default: ./rips)");
|
||||||
opts.addOption("n", "no-prop-file", false, "Do not create properties file.");
|
opts.addOption("n", "no-prop-file", false, "Do not create properties file.");
|
||||||
opts.addOption("f", "urls-file", true, "Rip URLs from a file.");
|
opts.addOption("f", "urls-file", true, "Rip URLs from a file.");
|
||||||
|
opts.addOption("v", "version", false, "Show current version");
|
||||||
return opts;
|
return opts;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static CommandLine getArgs(String[] args) {
|
/**
|
||||||
|
* Tries to parse commandline arguments.
|
||||||
|
* @param args Array of commandline arguments.
|
||||||
|
* @return CommandLine object containing arguments.
|
||||||
|
*/
|
||||||
|
private static CommandLine getArgs(String[] args) {
|
||||||
BasicParser parser = new BasicParser();
|
BasicParser parser = new BasicParser();
|
||||||
try {
|
try {
|
||||||
CommandLine cl = parser.parse(getOptions(), args, false);
|
return parser.parse(getOptions(), args, false);
|
||||||
return cl;
|
|
||||||
} catch (ParseException e) {
|
} catch (ParseException e) {
|
||||||
logger.error("[!] Error while parsing command-line arguments: " + Arrays.toString(args), e);
|
logger.error("[!] Error while parsing command-line arguments: " + Arrays.toString(args), e);
|
||||||
System.exit(-1);
|
System.exit(-1);
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Loads history from history file into memory.
|
||||||
|
*/
|
||||||
private static void loadHistory() {
|
private static void loadHistory() {
|
||||||
File historyFile = new File("history.json");
|
File historyFile = new File(Utils.getConfigDir() + File.separator + "history.json");
|
||||||
HISTORY.clear();
|
HISTORY.clear();
|
||||||
if (historyFile.exists()) {
|
if (historyFile.exists()) {
|
||||||
try {
|
try {
|
||||||
logger.info("Loading history from history.json");
|
logger.info("Loading history from " + historyFile.getCanonicalPath());
|
||||||
HISTORY.fromFile("history.json");
|
HISTORY.fromFile(historyFile.getCanonicalPath());
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
logger.error("Failed to load history from file " + historyFile, e);
|
logger.error("Failed to load history from file " + historyFile, e);
|
||||||
System.out.println(
|
logger.warn(
|
||||||
"RipMe failed to load the history file at " + historyFile.getAbsolutePath() + "\n\n" +
|
"RipMe failed to load the history file at " + historyFile.getAbsolutePath() + "\n\n" +
|
||||||
"Error: " + e.getMessage() + "\n\n" +
|
"Error: " + e.getMessage() + "\n\n" +
|
||||||
"Closing RipMe will automatically overwrite the contents of this file,\n" +
|
"Closing RipMe will automatically overwrite the contents of this file,\n" +
|
||||||
@ -234,12 +269,7 @@ public class App {
|
|||||||
if (HISTORY.toList().size() == 0) {
|
if (HISTORY.toList().size() == 0) {
|
||||||
// Loaded from config, still no entries.
|
// Loaded from config, still no entries.
|
||||||
// Guess rip history based on rip folder
|
// Guess rip history based on rip folder
|
||||||
String[] dirs = Utils.getWorkingDirectory().list(new FilenameFilter() {
|
String[] dirs = Utils.getWorkingDirectory().list((dir, file) -> new File(dir.getAbsolutePath() + File.separator + file).isDirectory());
|
||||||
@Override
|
|
||||||
public boolean accept(File dir, String file) {
|
|
||||||
return new File(dir.getAbsolutePath() + File.separator + file).isDirectory();
|
|
||||||
}
|
|
||||||
});
|
|
||||||
for (String dir : dirs) {
|
for (String dir : dirs) {
|
||||||
String url = RipUtils.urlFromDirectoryName(dir);
|
String url = RipUtils.urlFromDirectoryName(dir);
|
||||||
if (url != null) {
|
if (url != null) {
|
||||||
|
@ -17,27 +17,27 @@ import com.rarchives.ripme.utils.Utils;
|
|||||||
*/
|
*/
|
||||||
public abstract class AbstractHTMLRipper extends AlbumRipper {
|
public abstract class AbstractHTMLRipper extends AlbumRipper {
|
||||||
|
|
||||||
public AbstractHTMLRipper(URL url) throws IOException {
|
protected AbstractHTMLRipper(URL url) throws IOException {
|
||||||
super(url);
|
super(url);
|
||||||
}
|
}
|
||||||
|
|
||||||
public abstract String getDomain();
|
protected abstract String getDomain();
|
||||||
public abstract String getHost();
|
public abstract String getHost();
|
||||||
|
|
||||||
public abstract Document getFirstPage() throws IOException;
|
protected abstract Document getFirstPage() throws IOException;
|
||||||
public Document getNextPage(Document doc) throws IOException {
|
public Document getNextPage(Document doc) throws IOException {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
public abstract List<String> getURLsFromPage(Document page);
|
protected abstract List<String> getURLsFromPage(Document page);
|
||||||
public List<String> getDescriptionsFromPage(Document doc) throws IOException {
|
protected List<String> getDescriptionsFromPage(Document doc) throws IOException {
|
||||||
throw new IOException("getDescriptionsFromPage not implemented"); // Do I do this or make an abstract function?
|
throw new IOException("getDescriptionsFromPage not implemented"); // Do I do this or make an abstract function?
|
||||||
}
|
}
|
||||||
public abstract void downloadURL(URL url, int index);
|
protected abstract void downloadURL(URL url, int index);
|
||||||
public DownloadThreadPool getThreadPool() {
|
protected DownloadThreadPool getThreadPool() {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean keepSortOrder() {
|
protected boolean keepSortOrder() {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -50,13 +50,13 @@ public abstract class AbstractHTMLRipper extends AlbumRipper {
|
|||||||
public URL sanitizeURL(URL url) throws MalformedURLException {
|
public URL sanitizeURL(URL url) throws MalformedURLException {
|
||||||
return url;
|
return url;
|
||||||
}
|
}
|
||||||
public boolean hasDescriptionSupport() {
|
protected boolean hasDescriptionSupport() {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
public String[] getDescription(String url,Document page) throws IOException {
|
protected String[] getDescription(String url, Document page) throws IOException {
|
||||||
throw new IOException("getDescription not implemented"); // Do I do this or make an abstract function?
|
throw new IOException("getDescription not implemented"); // Do I do this or make an abstract function?
|
||||||
}
|
}
|
||||||
public int descSleepTime() {
|
protected int descSleepTime() {
|
||||||
return 100;
|
return 100;
|
||||||
}
|
}
|
||||||
@Override
|
@Override
|
||||||
@ -140,7 +140,15 @@ public abstract class AbstractHTMLRipper extends AlbumRipper {
|
|||||||
}
|
}
|
||||||
waitForThreads();
|
waitForThreads();
|
||||||
}
|
}
|
||||||
public String fileNameFromURL(URL url) {
|
|
||||||
|
/**
|
||||||
|
* Gets the file name from the URL
|
||||||
|
* @param url
|
||||||
|
* URL that you want to get the filename from
|
||||||
|
* @return
|
||||||
|
* Filename of the URL
|
||||||
|
*/
|
||||||
|
private String fileNameFromURL(URL url) {
|
||||||
String saveAs = url.toExternalForm();
|
String saveAs = url.toExternalForm();
|
||||||
if (saveAs.substring(saveAs.length() - 1) == "/") { saveAs = saveAs.substring(0,saveAs.length() - 1) ;}
|
if (saveAs.substring(saveAs.length() - 1) == "/") { saveAs = saveAs.substring(0,saveAs.length() - 1) ;}
|
||||||
saveAs = saveAs.substring(saveAs.lastIndexOf('/')+1);
|
saveAs = saveAs.substring(saveAs.lastIndexOf('/')+1);
|
||||||
@ -150,11 +158,25 @@ public abstract class AbstractHTMLRipper extends AlbumRipper {
|
|||||||
if (saveAs.indexOf(':') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf(':')); }
|
if (saveAs.indexOf(':') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf(':')); }
|
||||||
return saveAs;
|
return saveAs;
|
||||||
}
|
}
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @param url
|
||||||
|
* Target URL
|
||||||
|
* @param subdirectory
|
||||||
|
* Path to subdirectory where you want to save it
|
||||||
|
* @param text
|
||||||
|
* Text you want to save
|
||||||
|
* @param index
|
||||||
|
* Index in something like an album
|
||||||
|
* @return
|
||||||
|
* True if ripped successfully
|
||||||
|
* False if failed
|
||||||
|
*/
|
||||||
public boolean saveText(URL url, String subdirectory, String text, int index) {
|
public boolean saveText(URL url, String subdirectory, String text, int index) {
|
||||||
String saveAs = fileNameFromURL(url);
|
String saveAs = fileNameFromURL(url);
|
||||||
return saveText(url,subdirectory,text,index,saveAs);
|
return saveText(url,subdirectory,text,index,saveAs);
|
||||||
}
|
}
|
||||||
public boolean saveText(URL url, String subdirectory, String text, int index, String fileName) {
|
private boolean saveText(URL url, String subdirectory, String text, int index, String fileName) {
|
||||||
// Not the best for some cases, like FurAffinity. Overridden there.
|
// Not the best for some cases, like FurAffinity. Overridden there.
|
||||||
try {
|
try {
|
||||||
stopCheck();
|
stopCheck();
|
||||||
@ -189,7 +211,15 @@ public abstract class AbstractHTMLRipper extends AlbumRipper {
|
|||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
public String getPrefix(int index) {
|
|
||||||
|
/**
|
||||||
|
* Gets prefix based on where in the index it is
|
||||||
|
* @param index
|
||||||
|
* The index in question
|
||||||
|
* @return
|
||||||
|
* Returns prefix for a file. (?)
|
||||||
|
*/
|
||||||
|
protected String getPrefix(int index) {
|
||||||
String prefix = "";
|
String prefix = "";
|
||||||
if (keepSortOrder() && Utils.getConfigBoolean("download.save_order", true)) {
|
if (keepSortOrder() && Utils.getConfigBoolean("download.save_order", true)) {
|
||||||
prefix = String.format("%03d_", index);
|
prefix = String.format("%03d_", index);
|
||||||
|
@ -15,24 +15,25 @@ import com.rarchives.ripme.utils.Utils;
|
|||||||
*/
|
*/
|
||||||
public abstract class AbstractJSONRipper extends AlbumRipper {
|
public abstract class AbstractJSONRipper extends AlbumRipper {
|
||||||
|
|
||||||
public AbstractJSONRipper(URL url) throws IOException {
|
protected AbstractJSONRipper(URL url) throws IOException {
|
||||||
super(url);
|
super(url);
|
||||||
}
|
}
|
||||||
|
|
||||||
public abstract String getDomain();
|
protected abstract String getDomain();
|
||||||
|
@Override
|
||||||
public abstract String getHost();
|
public abstract String getHost();
|
||||||
|
|
||||||
public abstract JSONObject getFirstPage() throws IOException;
|
protected abstract JSONObject getFirstPage() throws IOException;
|
||||||
public JSONObject getNextPage(JSONObject doc) throws IOException {
|
protected JSONObject getNextPage(JSONObject doc) throws IOException {
|
||||||
throw new IOException("getNextPage not implemented");
|
throw new IOException("getNextPage not implemented");
|
||||||
}
|
}
|
||||||
public abstract List<String> getURLsFromJSON(JSONObject json);
|
protected abstract List<String> getURLsFromJSON(JSONObject json);
|
||||||
public abstract void downloadURL(URL url, int index);
|
protected abstract void downloadURL(URL url, int index);
|
||||||
public DownloadThreadPool getThreadPool() {
|
private DownloadThreadPool getThreadPool() {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean keepSortOrder() {
|
protected boolean keepSortOrder() {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -96,7 +97,7 @@ public abstract class AbstractJSONRipper extends AlbumRipper {
|
|||||||
waitForThreads();
|
waitForThreads();
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getPrefix(int index) {
|
protected String getPrefix(int index) {
|
||||||
String prefix = "";
|
String prefix = "";
|
||||||
if (keepSortOrder() && Utils.getConfigBoolean("download.save_order", true)) {
|
if (keepSortOrder() && Utils.getConfigBoolean("download.save_order", true)) {
|
||||||
prefix = String.format("%03d_", index);
|
prefix = String.format("%03d_", index);
|
||||||
|
@ -1,8 +1,7 @@
|
|||||||
package com.rarchives.ripme.ripper;
|
package com.rarchives.ripme.ripper;
|
||||||
|
|
||||||
import java.awt.Desktop;
|
import java.awt.Desktop;
|
||||||
import java.io.File;
|
import java.io.*;
|
||||||
import java.io.IOException;
|
|
||||||
import java.lang.reflect.Constructor;
|
import java.lang.reflect.Constructor;
|
||||||
import java.net.MalformedURLException;
|
import java.net.MalformedURLException;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
@ -21,21 +20,25 @@ import com.rarchives.ripme.ui.RipStatusMessage;
|
|||||||
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
||||||
import com.rarchives.ripme.utils.Utils;
|
import com.rarchives.ripme.utils.Utils;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.util.Scanner;
|
||||||
|
|
||||||
public abstract class AbstractRipper
|
public abstract class AbstractRipper
|
||||||
extends Observable
|
extends Observable
|
||||||
implements RipperInterface, Runnable {
|
implements RipperInterface, Runnable {
|
||||||
|
|
||||||
protected static final Logger logger = Logger.getLogger(AbstractRipper.class);
|
protected static final Logger logger = Logger.getLogger(AbstractRipper.class);
|
||||||
|
private final String URLHistoryFile = Utils.getURLHistoryFile();
|
||||||
|
|
||||||
public static final String USER_AGENT =
|
public static final String USER_AGENT =
|
||||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.7; rv:36.0) Gecko/20100101 Firefox/36.0";
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36";
|
||||||
|
|
||||||
protected URL url;
|
protected URL url;
|
||||||
protected File workingDir;
|
protected File workingDir;
|
||||||
protected DownloadThreadPool threadPool;
|
DownloadThreadPool threadPool;
|
||||||
protected RipStatusHandler observer = null;
|
RipStatusHandler observer = null;
|
||||||
|
|
||||||
protected boolean completed = true;
|
private boolean completed = true;
|
||||||
|
|
||||||
public abstract void rip() throws IOException;
|
public abstract void rip() throws IOException;
|
||||||
public abstract String getHost();
|
public abstract String getHost();
|
||||||
@ -56,6 +59,56 @@ public abstract class AbstractRipper
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void writeDownloadedURL(String downloadedURL) throws IOException {
|
||||||
|
BufferedWriter bw = null;
|
||||||
|
FileWriter fw = null;
|
||||||
|
try {
|
||||||
|
File file = new File(URLHistoryFile);
|
||||||
|
// if file doesnt exists, then create it
|
||||||
|
if (!file.exists()) {
|
||||||
|
file.createNewFile();
|
||||||
|
}
|
||||||
|
fw = new FileWriter(file.getAbsoluteFile(), true);
|
||||||
|
bw = new BufferedWriter(fw);
|
||||||
|
bw.write(downloadedURL);
|
||||||
|
} catch (IOException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
} finally {
|
||||||
|
try {
|
||||||
|
if (bw != null)
|
||||||
|
bw.close();
|
||||||
|
if (fw != null)
|
||||||
|
fw.close();
|
||||||
|
} catch (IOException ex) {
|
||||||
|
ex.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks to see if Ripme has already downloaded a URL
|
||||||
|
* @param url URL to check if downloaded
|
||||||
|
* @return
|
||||||
|
* Returns true if previously downloaded.
|
||||||
|
* Returns false if not yet downloaded.
|
||||||
|
*/
|
||||||
|
private boolean hasDownloadedURL(String url) {
|
||||||
|
File file = new File(URLHistoryFile);
|
||||||
|
try {
|
||||||
|
Scanner scanner = new Scanner(file);
|
||||||
|
while (scanner.hasNextLine()) {
|
||||||
|
final String lineFromFile = scanner.nextLine();
|
||||||
|
if (lineFromFile.equals(url)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (FileNotFoundException e) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Ensures inheriting ripper can rip this URL, raises exception if not.
|
* Ensures inheriting ripper can rip this URL, raises exception if not.
|
||||||
* Otherwise initializes working directory and thread pool.
|
* Otherwise initializes working directory and thread pool.
|
||||||
@ -72,6 +125,15 @@ public abstract class AbstractRipper
|
|||||||
this.url = sanitizeURL(url);
|
this.url = sanitizeURL(url);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets ripper's:
|
||||||
|
* Working directory
|
||||||
|
* Logger (for debugging)
|
||||||
|
* FileAppender
|
||||||
|
* Threadpool
|
||||||
|
* @throws IOException
|
||||||
|
* Always be prepared.
|
||||||
|
*/
|
||||||
public void setup() throws IOException {
|
public void setup() throws IOException {
|
||||||
setWorkingDir(this.url);
|
setWorkingDir(this.url);
|
||||||
Logger rootLogger = Logger.getRootLogger();
|
Logger rootLogger = Logger.getRootLogger();
|
||||||
@ -109,10 +171,34 @@ public abstract class AbstractRipper
|
|||||||
* @param cookies
|
* @param cookies
|
||||||
* The cookies to send to the server while downloading this file.
|
* The cookies to send to the server while downloading this file.
|
||||||
* @return
|
* @return
|
||||||
|
* True if downloaded successfully
|
||||||
|
* False if failed to download
|
||||||
*/
|
*/
|
||||||
public abstract boolean addURLToDownload(URL url, File saveAs, String referrer, Map<String,String> cookies);
|
protected abstract boolean addURLToDownload(URL url, File saveAs, String referrer, Map<String, String> cookies);
|
||||||
|
|
||||||
public boolean addURLToDownload(URL url, String prefix, String subdirectory, String referrer, Map<String,String> cookies) {
|
/**
|
||||||
|
* Queues image to be downloaded and saved.
|
||||||
|
* @param url
|
||||||
|
* URL of the file
|
||||||
|
* @param prefix
|
||||||
|
* Prefix for the downloaded file
|
||||||
|
* @param subdirectory
|
||||||
|
* Path to get to desired directory from working directory
|
||||||
|
* @param referrer
|
||||||
|
* The HTTP referrer to use while downloading this file.
|
||||||
|
* @param cookies
|
||||||
|
* The cookies to send to the server while downloading this file.
|
||||||
|
* @return
|
||||||
|
* True if downloaded successfully
|
||||||
|
* False if failed to download
|
||||||
|
*/
|
||||||
|
protected boolean addURLToDownload(URL url, String prefix, String subdirectory, String referrer, Map<String, String> cookies) {
|
||||||
|
if (Utils.getConfigBoolean("remember.url_history", true) && !isThisATest()) {
|
||||||
|
if (hasDownloadedURL(url.toExternalForm())) {
|
||||||
|
sendUpdate(STATUS.DOWNLOAD_WARN, "Already downloaded " + url.toExternalForm());
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
try {
|
try {
|
||||||
stopCheck();
|
stopCheck();
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
@ -131,6 +217,7 @@ public abstract class AbstractRipper
|
|||||||
if (!subdirectory.equals("")) {
|
if (!subdirectory.equals("")) {
|
||||||
subdirectory = File.separator + subdirectory;
|
subdirectory = File.separator + subdirectory;
|
||||||
}
|
}
|
||||||
|
prefix = Utils.filesystemSanitized(prefix);
|
||||||
saveFileAs = new File(
|
saveFileAs = new File(
|
||||||
workingDir.getCanonicalPath()
|
workingDir.getCanonicalPath()
|
||||||
+ subdirectory
|
+ subdirectory
|
||||||
@ -146,6 +233,13 @@ public abstract class AbstractRipper
|
|||||||
logger.info("[+] Creating directory: " + Utils.removeCWD(saveFileAs.getParent()));
|
logger.info("[+] Creating directory: " + Utils.removeCWD(saveFileAs.getParent()));
|
||||||
saveFileAs.getParentFile().mkdirs();
|
saveFileAs.getParentFile().mkdirs();
|
||||||
}
|
}
|
||||||
|
if (Utils.getConfigBoolean("remember.url_history", true) && !isThisATest()) {
|
||||||
|
try {
|
||||||
|
writeDownloadedURL(url.toExternalForm() + "\n");
|
||||||
|
} catch (IOException e) {
|
||||||
|
logger.debug("Unable to write URL history file");
|
||||||
|
}
|
||||||
|
}
|
||||||
return addURLToDownload(url, saveFileAs, referrer, cookies);
|
return addURLToDownload(url, saveFileAs, referrer, cookies);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -159,7 +253,7 @@ public abstract class AbstractRipper
|
|||||||
* Sub-directory of the working directory to save the images to.
|
* Sub-directory of the working directory to save the images to.
|
||||||
* @return True on success, flase on failure.
|
* @return True on success, flase on failure.
|
||||||
*/
|
*/
|
||||||
public boolean addURLToDownload(URL url, String prefix, String subdirectory) {
|
protected boolean addURLToDownload(URL url, String prefix, String subdirectory) {
|
||||||
return addURLToDownload(url, prefix, subdirectory, null, null);
|
return addURLToDownload(url, prefix, subdirectory, null, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -172,7 +266,7 @@ public abstract class AbstractRipper
|
|||||||
* Text to append to saved filename.
|
* Text to append to saved filename.
|
||||||
* @return True on success, flase on failure.
|
* @return True on success, flase on failure.
|
||||||
*/
|
*/
|
||||||
public boolean addURLToDownload(URL url, String prefix) {
|
protected boolean addURLToDownload(URL url, String prefix) {
|
||||||
// Use empty subdirectory
|
// Use empty subdirectory
|
||||||
return addURLToDownload(url, prefix, "");
|
return addURLToDownload(url, prefix, "");
|
||||||
}
|
}
|
||||||
@ -223,14 +317,14 @@ public abstract class AbstractRipper
|
|||||||
/**
|
/**
|
||||||
* @return Number of files downloaded.
|
* @return Number of files downloaded.
|
||||||
*/
|
*/
|
||||||
public int getCount() {
|
int getCount() {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Notifies observers and updates state if all files have been ripped.
|
* Notifies observers and updates state if all files have been ripped.
|
||||||
*/
|
*/
|
||||||
protected void checkIfComplete() {
|
void checkIfComplete() {
|
||||||
if (observer == null) {
|
if (observer == null) {
|
||||||
logger.debug("observer is null");
|
logger.debug("observer is null");
|
||||||
return;
|
return;
|
||||||
@ -262,6 +356,11 @@ public abstract class AbstractRipper
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets URL
|
||||||
|
* @return
|
||||||
|
* Returns URL that wants to be downloaded.
|
||||||
|
*/
|
||||||
public URL getURL() {
|
public URL getURL() {
|
||||||
return url;
|
return url;
|
||||||
}
|
}
|
||||||
@ -275,8 +374,20 @@ public abstract class AbstractRipper
|
|||||||
return workingDir;
|
return workingDir;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
public abstract void setWorkingDir(URL url) throws IOException;
|
public abstract void setWorkingDir(URL url) throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @param url
|
||||||
|
* The URL you want to get the title of.
|
||||||
|
* @return
|
||||||
|
* host_URLid
|
||||||
|
* e.g. (for a reddit post)
|
||||||
|
* reddit_post_7mg2ur
|
||||||
|
* @throws MalformedURLException
|
||||||
|
* If any of those damned URLs gets malformed.
|
||||||
|
*/
|
||||||
public String getAlbumTitle(URL url) throws MalformedURLException {
|
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||||
return getHost() + "_" + getGID(url);
|
return getHost() + "_" + getGID(url);
|
||||||
}
|
}
|
||||||
@ -320,10 +431,10 @@ public abstract class AbstractRipper
|
|||||||
* @throws Exception
|
* @throws Exception
|
||||||
*/
|
*/
|
||||||
public static List<Constructor<?>> getRipperConstructors(String pkg) throws Exception {
|
public static List<Constructor<?>> getRipperConstructors(String pkg) throws Exception {
|
||||||
List<Constructor<?>> constructors = new ArrayList<Constructor<?>>();
|
List<Constructor<?>> constructors = new ArrayList<>();
|
||||||
for (Class<?> clazz : Utils.getClassesForPackage(pkg)) {
|
for (Class<?> clazz : Utils.getClassesForPackage(pkg)) {
|
||||||
if (AbstractRipper.class.isAssignableFrom(clazz)) {
|
if (AbstractRipper.class.isAssignableFrom(clazz)) {
|
||||||
constructors.add( (Constructor<?>) clazz.getConstructor(URL.class) );
|
constructors.add(clazz.getConstructor(URL.class));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return constructors;
|
return constructors;
|
||||||
@ -331,7 +442,7 @@ public abstract class AbstractRipper
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Sends an update message to the relevant observer(s) on this ripper.
|
* Sends an update message to the relevant observer(s) on this ripper.
|
||||||
* @param status
|
* @param status
|
||||||
* @param message
|
* @param message
|
||||||
*/
|
*/
|
||||||
public void sendUpdate(STATUS status, Object message) {
|
public void sendUpdate(STATUS status, Object message) {
|
||||||
@ -340,9 +451,17 @@ public abstract class AbstractRipper
|
|||||||
}
|
}
|
||||||
observer.update(this, new RipStatusMessage(status, message));
|
observer.update(this, new RipStatusMessage(status, message));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the completion percentage.
|
||||||
|
* @return
|
||||||
|
* Percentage complete
|
||||||
|
*/
|
||||||
public abstract int getCompletionPercentage();
|
public abstract int getCompletionPercentage();
|
||||||
|
/**
|
||||||
|
* @return
|
||||||
|
* Text for status
|
||||||
|
*/
|
||||||
public abstract String getStatusText();
|
public abstract String getStatusText();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -355,10 +474,6 @@ public abstract class AbstractRipper
|
|||||||
logger.error("Got exception while running ripper:", e);
|
logger.error("Got exception while running ripper:", e);
|
||||||
waitForThreads();
|
waitForThreads();
|
||||||
sendUpdate(STATUS.RIP_ERRORED, "HTTP status code " + e.getStatusCode() + " for URL " + e.getUrl());
|
sendUpdate(STATUS.RIP_ERRORED, "HTTP status code " + e.getStatusCode() + " for URL " + e.getUrl());
|
||||||
} catch (IOException e) {
|
|
||||||
logger.error("Got exception while running ripper:", e);
|
|
||||||
waitForThreads();
|
|
||||||
sendUpdate(STATUS.RIP_ERRORED, e.getMessage());
|
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
logger.error("Got exception while running ripper:", e);
|
logger.error("Got exception while running ripper:", e);
|
||||||
waitForThreads();
|
waitForThreads();
|
||||||
@ -367,8 +482,10 @@ public abstract class AbstractRipper
|
|||||||
cleanup();
|
cleanup();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
/**
|
||||||
public void cleanup() {
|
* Tries to delete any empty directories
|
||||||
|
*/
|
||||||
|
private void cleanup() {
|
||||||
if (this.workingDir.list().length == 0) {
|
if (this.workingDir.list().length == 0) {
|
||||||
// No files, delete the dir
|
// No files, delete the dir
|
||||||
logger.info("Deleting empty directory " + this.workingDir);
|
logger.info("Deleting empty directory " + this.workingDir);
|
||||||
@ -378,8 +495,16 @@ public abstract class AbstractRipper
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean sleep(int milliseconds) {
|
/**
|
||||||
|
* Pauses thread for a set amount of time.
|
||||||
|
* @param milliseconds
|
||||||
|
* Amount of time (in milliseconds) that the thread gets paused for
|
||||||
|
* @return
|
||||||
|
* True if paused successfully
|
||||||
|
* False if failed to pause/got interrupted.
|
||||||
|
*/
|
||||||
|
protected boolean sleep(int milliseconds) {
|
||||||
try {
|
try {
|
||||||
logger.debug("Sleeping " + milliseconds + "ms");
|
logger.debug("Sleeping " + milliseconds + "ms");
|
||||||
Thread.sleep(milliseconds);
|
Thread.sleep(milliseconds);
|
||||||
@ -402,7 +527,7 @@ public abstract class AbstractRipper
|
|||||||
logger.debug("THIS IS A TEST RIP");
|
logger.debug("THIS IS A TEST RIP");
|
||||||
thisIsATest = true;
|
thisIsATest = true;
|
||||||
}
|
}
|
||||||
public boolean isThisATest() {
|
protected boolean isThisATest() {
|
||||||
return thisIsATest;
|
return thisIsATest;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -13,13 +13,17 @@ import com.rarchives.ripme.ui.RipStatusMessage;
|
|||||||
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
||||||
import com.rarchives.ripme.utils.Utils;
|
import com.rarchives.ripme.utils.Utils;
|
||||||
|
|
||||||
|
|
||||||
|
/**'
|
||||||
|
* For ripping delicious albums off the interwebz.
|
||||||
|
*/
|
||||||
public abstract class AlbumRipper extends AbstractRipper {
|
public abstract class AlbumRipper extends AbstractRipper {
|
||||||
|
|
||||||
protected Map<URL, File> itemsPending = Collections.synchronizedMap(new HashMap<URL, File>());
|
private Map<URL, File> itemsPending = Collections.synchronizedMap(new HashMap<URL, File>());
|
||||||
protected Map<URL, File> itemsCompleted = Collections.synchronizedMap(new HashMap<URL, File>());
|
private Map<URL, File> itemsCompleted = Collections.synchronizedMap(new HashMap<URL, File>());
|
||||||
protected Map<URL, String> itemsErrored = Collections.synchronizedMap(new HashMap<URL, String>());
|
private Map<URL, String> itemsErrored = Collections.synchronizedMap(new HashMap<URL, String>());
|
||||||
|
|
||||||
public AlbumRipper(URL url) throws IOException {
|
protected AlbumRipper(URL url) throws IOException {
|
||||||
super(url);
|
super(url);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -29,15 +33,22 @@ public abstract class AlbumRipper extends AbstractRipper {
|
|||||||
public abstract String getHost();
|
public abstract String getHost();
|
||||||
public abstract String getGID(URL url) throws MalformedURLException;
|
public abstract String getGID(URL url) throws MalformedURLException;
|
||||||
|
|
||||||
public boolean allowDuplicates() {
|
protected boolean allowDuplicates() {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
/**
|
||||||
|
* Returns total amount of files attempted.
|
||||||
|
*/
|
||||||
public int getCount() {
|
public int getCount() {
|
||||||
return itemsCompleted.size() + itemsErrored.size();
|
return itemsCompleted.size() + itemsErrored.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
/**
|
||||||
|
* Queues multiple URLs of single images to download from a single Album URL
|
||||||
|
*/
|
||||||
public boolean addURLToDownload(URL url, File saveAs, String referrer, Map<String,String> cookies) {
|
public boolean addURLToDownload(URL url, File saveAs, String referrer, Map<String,String> cookies) {
|
||||||
// Only download one file if this is a test.
|
// Only download one file if this is a test.
|
||||||
if (super.isThisATest() &&
|
if (super.isThisATest() &&
|
||||||
@ -95,12 +106,15 @@ public abstract class AlbumRipper extends AbstractRipper {
|
|||||||
* @return
|
* @return
|
||||||
* True on success
|
* True on success
|
||||||
*/
|
*/
|
||||||
public boolean addURLToDownload(URL url) {
|
protected boolean addURLToDownload(URL url) {
|
||||||
// Use empty prefix and empty subdirectory
|
// Use empty prefix and empty subdirectory
|
||||||
return addURLToDownload(url, "", "");
|
return addURLToDownload(url, "", "");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
/**
|
||||||
|
* Cleans up & tells user about successful download
|
||||||
|
*/
|
||||||
public void downloadCompleted(URL url, File saveAs) {
|
public void downloadCompleted(URL url, File saveAs) {
|
||||||
if (observer == null) {
|
if (observer == null) {
|
||||||
return;
|
return;
|
||||||
@ -119,6 +133,9 @@ public abstract class AlbumRipper extends AbstractRipper {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
/**
|
||||||
|
* Cleans up & tells user about failed download.
|
||||||
|
*/
|
||||||
public void downloadErrored(URL url, String reason) {
|
public void downloadErrored(URL url, String reason) {
|
||||||
if (observer == null) {
|
if (observer == null) {
|
||||||
return;
|
return;
|
||||||
@ -131,6 +148,10 @@ public abstract class AlbumRipper extends AbstractRipper {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
/**
|
||||||
|
* Tells user that a single file in the album they wish to download has
|
||||||
|
* already been downloaded in the past.
|
||||||
|
*/
|
||||||
public void downloadExists(URL url, File file) {
|
public void downloadExists(URL url, File file) {
|
||||||
if (observer == null) {
|
if (observer == null) {
|
||||||
return;
|
return;
|
||||||
|
@ -24,12 +24,12 @@ import com.rarchives.ripme.utils.Utils;
|
|||||||
* Thread for downloading files.
|
* Thread for downloading files.
|
||||||
* Includes retry logic, observer notifications, and other goodies.
|
* Includes retry logic, observer notifications, and other goodies.
|
||||||
*/
|
*/
|
||||||
public class DownloadFileThread extends Thread {
|
class DownloadFileThread extends Thread {
|
||||||
|
|
||||||
private static final Logger logger = Logger.getLogger(DownloadFileThread.class);
|
private static final Logger logger = Logger.getLogger(DownloadFileThread.class);
|
||||||
|
|
||||||
private String referrer = "";
|
private String referrer = "";
|
||||||
private Map<String,String> cookies = new HashMap<String,String>();
|
private Map<String,String> cookies = new HashMap<>();
|
||||||
|
|
||||||
private URL url;
|
private URL url;
|
||||||
private File saveAs;
|
private File saveAs;
|
||||||
|
@ -23,17 +23,28 @@ public class DownloadThreadPool {
|
|||||||
public DownloadThreadPool(String threadPoolName) {
|
public DownloadThreadPool(String threadPoolName) {
|
||||||
initialize(threadPoolName);
|
initialize(threadPoolName);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initializes the threadpool.
|
||||||
|
* @param threadPoolName Name of the threadpool.
|
||||||
|
*/
|
||||||
private void initialize(String threadPoolName) {
|
private void initialize(String threadPoolName) {
|
||||||
int threads = Utils.getConfigInteger("threads.size", 10);
|
int threads = Utils.getConfigInteger("threads.size", 10);
|
||||||
logger.debug("Initializing " + threadPoolName + " thread pool with " + threads + " threads");
|
logger.debug("Initializing " + threadPoolName + " thread pool with " + threads + " threads");
|
||||||
threadPool = (ThreadPoolExecutor) Executors.newFixedThreadPool(threads);
|
threadPool = (ThreadPoolExecutor) Executors.newFixedThreadPool(threads);
|
||||||
}
|
}
|
||||||
|
/**
|
||||||
|
* For adding threads to execution pool.
|
||||||
|
* @param t
|
||||||
|
* Thread to be added.
|
||||||
|
*/
|
||||||
public void addThread(Thread t) {
|
public void addThread(Thread t) {
|
||||||
threadPool.execute(t);
|
threadPool.execute(t);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tries to shutdown threadpool.
|
||||||
|
*/
|
||||||
public void waitForThreads() {
|
public void waitForThreads() {
|
||||||
threadPool.shutdown();
|
threadPool.shutdown();
|
||||||
try {
|
try {
|
||||||
|
@ -20,7 +20,7 @@ import com.rarchives.ripme.utils.Utils;
|
|||||||
* Thread for downloading files.
|
* Thread for downloading files.
|
||||||
* Includes retry logic, observer notifications, and other goodies.
|
* Includes retry logic, observer notifications, and other goodies.
|
||||||
*/
|
*/
|
||||||
public class DownloadVideoThread extends Thread {
|
class DownloadVideoThread extends Thread {
|
||||||
|
|
||||||
private static final Logger logger = Logger.getLogger(DownloadVideoThread.class);
|
private static final Logger logger = Logger.getLogger(DownloadVideoThread.class);
|
||||||
|
|
||||||
@ -136,6 +136,12 @@ public class DownloadVideoThread extends Thread {
|
|||||||
logger.info("[+] Saved " + url + " as " + this.prettySaveAs);
|
logger.info("[+] Saved " + url + " as " + this.prettySaveAs);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param url
|
||||||
|
* Target URL
|
||||||
|
* @return
|
||||||
|
* Returns connection length
|
||||||
|
*/
|
||||||
private int getTotalBytes(URL url) throws IOException {
|
private int getTotalBytes(URL url) throws IOException {
|
||||||
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
|
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
|
||||||
conn.setRequestMethod("HEAD");
|
conn.setRequestMethod("HEAD");
|
||||||
|
@ -7,8 +7,10 @@ import java.net.URL;
|
|||||||
/**
|
/**
|
||||||
* I have no idea why I made this interface. Everything is captured within the AbstractRipper.
|
* I have no idea why I made this interface. Everything is captured within the AbstractRipper.
|
||||||
* Oh well, here's to encapsulation and abstraction! (raises glass)
|
* Oh well, here's to encapsulation and abstraction! (raises glass)
|
||||||
|
*
|
||||||
|
* (cheers!)
|
||||||
*/
|
*/
|
||||||
public interface RipperInterface {
|
interface RipperInterface {
|
||||||
void rip() throws IOException;
|
void rip() throws IOException;
|
||||||
boolean canRip(URL url);
|
boolean canRip(URL url);
|
||||||
URL sanitizeURL(URL url) throws MalformedURLException;
|
URL sanitizeURL(URL url) throws MalformedURLException;
|
||||||
|
@ -16,7 +16,7 @@ public abstract class VideoRipper extends AbstractRipper {
|
|||||||
private int bytesTotal = 1,
|
private int bytesTotal = 1,
|
||||||
bytesCompleted = 1;
|
bytesCompleted = 1;
|
||||||
|
|
||||||
public VideoRipper(URL url) throws IOException {
|
protected VideoRipper(URL url) throws IOException {
|
||||||
super(url);
|
super(url);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -74,6 +74,12 @@ public abstract class VideoRipper extends AbstractRipper {
|
|||||||
return addURLToDownload(url, saveAs);
|
return addURLToDownload(url, saveAs);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates & sets working directory based on URL.
|
||||||
|
* @param url
|
||||||
|
* Target URL
|
||||||
|
*/
|
||||||
@Override
|
@Override
|
||||||
public void setWorkingDir(URL url) throws IOException {
|
public void setWorkingDir(URL url) throws IOException {
|
||||||
String path = Utils.getWorkingDirectory().getCanonicalPath();
|
String path = Utils.getWorkingDirectory().getCanonicalPath();
|
||||||
@ -88,12 +94,23 @@ public abstract class VideoRipper extends AbstractRipper {
|
|||||||
}
|
}
|
||||||
logger.debug("Set working directory to: " + this.workingDir);
|
logger.debug("Set working directory to: " + this.workingDir);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return
|
||||||
|
* Returns % of video done downloading.
|
||||||
|
*/
|
||||||
@Override
|
@Override
|
||||||
public int getCompletionPercentage() {
|
public int getCompletionPercentage() {
|
||||||
return (int) (100 * (bytesCompleted / (float) bytesTotal));
|
return (int) (100 * (bytesCompleted / (float) bytesTotal));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Runs if download successfully completed.
|
||||||
|
* @param url
|
||||||
|
* Target URL
|
||||||
|
* @param saveAs
|
||||||
|
* Path to file, including filename.
|
||||||
|
*/
|
||||||
@Override
|
@Override
|
||||||
public void downloadCompleted(URL url, File saveAs) {
|
public void downloadCompleted(URL url, File saveAs) {
|
||||||
if (observer == null) {
|
if (observer == null) {
|
||||||
@ -109,6 +126,14 @@ public abstract class VideoRipper extends AbstractRipper {
|
|||||||
logger.error("Exception while updating observer: ", e);
|
logger.error("Exception while updating observer: ", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Runs if the download errored somewhere.
|
||||||
|
* @param url
|
||||||
|
* Target URL
|
||||||
|
* @param reason
|
||||||
|
* Reason why the download failed.
|
||||||
|
*/
|
||||||
@Override
|
@Override
|
||||||
public void downloadErrored(URL url, String reason) {
|
public void downloadErrored(URL url, String reason) {
|
||||||
if (observer == null) {
|
if (observer == null) {
|
||||||
@ -117,6 +142,15 @@ public abstract class VideoRipper extends AbstractRipper {
|
|||||||
observer.update(this, new RipStatusMessage(STATUS.DOWNLOAD_ERRORED, url + " : " + reason));
|
observer.update(this, new RipStatusMessage(STATUS.DOWNLOAD_ERRORED, url + " : " + reason));
|
||||||
checkIfComplete();
|
checkIfComplete();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Runs if user tries to redownload an already existing File.
|
||||||
|
* @param url
|
||||||
|
* Target URL
|
||||||
|
* @param file
|
||||||
|
* Existing file
|
||||||
|
*/
|
||||||
@Override
|
@Override
|
||||||
public void downloadExists(URL url, File file) {
|
public void downloadExists(URL url, File file) {
|
||||||
if (observer == null) {
|
if (observer == null) {
|
||||||
@ -126,6 +160,11 @@ public abstract class VideoRipper extends AbstractRipper {
|
|||||||
checkIfComplete();
|
checkIfComplete();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the status and changes it to a human-readable form.
|
||||||
|
* @return
|
||||||
|
* Status of current download.
|
||||||
|
*/
|
||||||
@Override
|
@Override
|
||||||
public String getStatusText() {
|
public String getStatusText() {
|
||||||
StringBuilder sb = new StringBuilder();
|
StringBuilder sb = new StringBuilder();
|
||||||
@ -139,6 +178,10 @@ public abstract class VideoRipper extends AbstractRipper {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
/**
|
||||||
|
* Sanitizes URL.
|
||||||
|
* Usually just returns itself.
|
||||||
|
*/
|
||||||
public URL sanitizeURL(URL url) throws MalformedURLException {
|
public URL sanitizeURL(URL url) throws MalformedURLException {
|
||||||
return url;
|
return url;
|
||||||
}
|
}
|
||||||
|
@ -4,7 +4,6 @@ import java.io.IOException;
|
|||||||
import java.net.MalformedURLException;
|
import java.net.MalformedURLException;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
@ -16,14 +15,13 @@ import org.jsoup.nodes.Element;
|
|||||||
import org.jsoup.select.Elements;
|
import org.jsoup.select.Elements;
|
||||||
|
|
||||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||||
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
|
||||||
import com.rarchives.ripme.utils.Http;
|
import com.rarchives.ripme.utils.Http;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
|
||||||
public class AerisdiesRipper extends AbstractHTMLRipper {
|
public class AerisdiesRipper extends AbstractHTMLRipper {
|
||||||
|
|
||||||
private Document albumDoc = null;
|
private Document albumDoc = null;
|
||||||
private Map<String,String> cookies = new HashMap<String,String>();
|
private Map<String,String> cookies = new HashMap<>();
|
||||||
|
|
||||||
|
|
||||||
public AerisdiesRipper(URL url) throws IOException {
|
public AerisdiesRipper(URL url) throws IOException {
|
||||||
@ -41,20 +39,20 @@ public class AerisdiesRipper extends AbstractHTMLRipper {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getGID(URL url) throws MalformedURLException {
|
public String getGID(URL url) throws MalformedURLException {
|
||||||
Pattern p = Pattern.compile("^https?://www.aerisdies.com/html/lb/([a-z]*_[0-9]*_\\d)\\.html");
|
Pattern p = Pattern.compile("^https?://www.aerisdies.com/html/lb/[a-z]*_(\\d+)_\\d\\.html");
|
||||||
Matcher m = p.matcher(url.toExternalForm());
|
Matcher m = p.matcher(url.toExternalForm());
|
||||||
if (!m.matches()) {
|
if (!m.matches()) {
|
||||||
throw new MalformedURLException("Expected URL format: http://www.aerisdies.com/html/lb/albumDIG, got: " + url);
|
throw new MalformedURLException("Expected URL format: http://www.aerisdies.com/html/lb/albumDIG, got: " + url);
|
||||||
}
|
}
|
||||||
return m.group(m.groupCount());
|
return m.group(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getAlbumTitle(URL url) throws MalformedURLException {
|
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||||
try {
|
try {
|
||||||
// Attempt to use album title as GID
|
// Attempt to use album title as GID
|
||||||
String title = getFirstPage().select("title").first().text();
|
String title = getFirstPage().select("div > div > span[id=albumname] > a").first().text();
|
||||||
return getHost() + "_" + title.trim();
|
return getHost() + "_" + getGID(url) + "_" + title.trim();
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
// Fall back to default album naming convention
|
// Fall back to default album naming convention
|
||||||
logger.info("Unable to find title at " + url);
|
logger.info("Unable to find title at " + url);
|
||||||
@ -74,7 +72,7 @@ public class AerisdiesRipper extends AbstractHTMLRipper {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<String> getURLsFromPage(Document page) {
|
public List<String> getURLsFromPage(Document page) {
|
||||||
List<String> imageURLs = new ArrayList<String>();
|
List<String> imageURLs = new ArrayList<>();
|
||||||
Elements albumElements = page.select("div.imgbox > a > img");
|
Elements albumElements = page.select("div.imgbox > a > img");
|
||||||
for (Element imageBox : albumElements) {
|
for (Element imageBox : albumElements) {
|
||||||
String imageUrl = imageBox.attr("src");
|
String imageUrl = imageBox.attr("src");
|
||||||
|
@ -66,7 +66,7 @@ public class BcfakesRipper extends AbstractHTMLRipper {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<String> getURLsFromPage(Document doc) {
|
public List<String> getURLsFromPage(Document doc) {
|
||||||
List<String> imageURLs = new ArrayList<String>();
|
List<String> imageURLs = new ArrayList<>();
|
||||||
for (Element thumb : doc.select("div.ngg-gallery-thumbnail > a > img")) {
|
for (Element thumb : doc.select("div.ngg-gallery-thumbnail > a > img")) {
|
||||||
String imageURL = thumb.attr("src");
|
String imageURL = thumb.attr("src");
|
||||||
imageURL = imageURL.replace("thumbs/thumbs_", "");
|
imageURL = imageURL.replace("thumbs/thumbs_", "");
|
||||||
|
@ -1,70 +0,0 @@
|
|||||||
package com.rarchives.ripme.ripper.rippers;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.net.MalformedURLException;
|
|
||||||
import java.net.URL;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.regex.Matcher;
|
|
||||||
import java.util.regex.Pattern;
|
|
||||||
|
|
||||||
import org.jsoup.nodes.Document;
|
|
||||||
import org.jsoup.nodes.Element;
|
|
||||||
|
|
||||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
|
||||||
import com.rarchives.ripme.utils.Http;
|
|
||||||
|
|
||||||
public class ButttoucherRipper extends AbstractHTMLRipper {
|
|
||||||
|
|
||||||
public ButttoucherRipper(URL url) throws IOException {
|
|
||||||
super(url);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getHost() {
|
|
||||||
return "butttoucher";
|
|
||||||
}
|
|
||||||
@Override
|
|
||||||
public String getDomain() {
|
|
||||||
return "butttoucher.com";
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getGID(URL url) throws MalformedURLException {
|
|
||||||
Pattern p; Matcher m;
|
|
||||||
|
|
||||||
p = Pattern.compile("^.*butttoucher.com/users/([a-zA-Z0-9_\\-]{1,}).*$");
|
|
||||||
m = p.matcher(url.toExternalForm());
|
|
||||||
if (m.matches()) {
|
|
||||||
return m.group(1);
|
|
||||||
}
|
|
||||||
throw new MalformedURLException(
|
|
||||||
"Expected butttoucher.com gallery format: "
|
|
||||||
+ "butttoucher.com/users/<username>"
|
|
||||||
+ " Got: " + url);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Document getFirstPage() throws IOException {
|
|
||||||
return Http.url(this.url).get();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public List<String> getURLsFromPage(Document page) {
|
|
||||||
List<String> thumbs = new ArrayList<String>();
|
|
||||||
for (Element thumb : page.select(".thumb img")) {
|
|
||||||
if (!thumb.hasAttr("src")) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
String smallImage = thumb.attr("src");
|
|
||||||
thumbs.add(smallImage.replace("m.", "."));
|
|
||||||
}
|
|
||||||
return thumbs;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void downloadURL(URL url, int index) {
|
|
||||||
addURLToDownload(url, getPrefix(index));
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
@ -0,0 +1,91 @@
|
|||||||
|
package com.rarchives.ripme.ripper.rippers;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.net.MalformedURLException;
|
||||||
|
import java.net.URL;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
import org.jsoup.nodes.Document;
|
||||||
|
import org.jsoup.nodes.Element;
|
||||||
|
|
||||||
|
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||||
|
import com.rarchives.ripme.utils.Http;
|
||||||
|
|
||||||
|
public class CfakeRipper extends AbstractHTMLRipper {
|
||||||
|
|
||||||
|
public CfakeRipper(URL url) throws IOException {
|
||||||
|
super(url);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getHost() {
|
||||||
|
return "cfake";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getDomain() {
|
||||||
|
return "cfake.com";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getGID(URL url) throws MalformedURLException {
|
||||||
|
Pattern p = Pattern.compile("https?://cfake\\.com/picture/([a-zA-Z1-9_-]*)/\\d+/?$");
|
||||||
|
Matcher m = p.matcher(url.toExternalForm());
|
||||||
|
if (m.matches()) {
|
||||||
|
return m.group(1);
|
||||||
|
}
|
||||||
|
throw new MalformedURLException("Expected cfake URL format: " +
|
||||||
|
"cfake.com/picture/MODEL/ID - got " + url + " instead");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Document getFirstPage() throws IOException {
|
||||||
|
// "url" is an instance field of the superclass
|
||||||
|
return Http.url(url).get();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Document getNextPage(Document doc) throws IOException {
|
||||||
|
// Find next page
|
||||||
|
String nextUrl = "";
|
||||||
|
// We use comic-nav-next to the find the next page
|
||||||
|
Element elem = doc.select("td > div.next > a").first();
|
||||||
|
if (elem == null) {
|
||||||
|
throw new IOException("No more pages");
|
||||||
|
}
|
||||||
|
String nextPage = elem.attr("href");
|
||||||
|
// Some times this returns a empty string
|
||||||
|
// This for stops that
|
||||||
|
if (nextPage == "") {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return Http.url("http://cfake.com" + nextPage).get();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<String> getURLsFromPage(Document doc) {
|
||||||
|
List<String> result = new ArrayList<>();
|
||||||
|
for (Element el : doc.select("table.display > tbody > tr > td > table > tbody > tr > td > a")) {
|
||||||
|
if (el.attr("href").contains("upload")) {
|
||||||
|
return result;
|
||||||
|
} else {
|
||||||
|
String imageSource = el.select("img").attr("src");
|
||||||
|
// We remove the .md from images so we download the full size image
|
||||||
|
// not the thumbnail ones
|
||||||
|
imageSource = imageSource.replace("thumbs", "photos");
|
||||||
|
result.add("http://cfake.com" + imageSource);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void downloadURL(URL url, int index) {
|
||||||
|
addURLToDownload(url, getPrefix(index));
|
||||||
|
}
|
||||||
|
}
|
@ -18,23 +18,21 @@ import com.rarchives.ripme.utils.Http;
|
|||||||
import com.rarchives.ripme.utils.RipUtils;
|
import com.rarchives.ripme.utils.RipUtils;
|
||||||
|
|
||||||
public class ChanRipper extends AbstractHTMLRipper {
|
public class ChanRipper extends AbstractHTMLRipper {
|
||||||
public static List<ChanSite> explicit_domains = Arrays.asList(
|
private static List<ChanSite> explicit_domains = Arrays.asList(
|
||||||
new ChanSite(Arrays.asList("boards.4chan.org"), Arrays.asList("4cdn.org", "is.4chan.org", "is2.4chan.org")),
|
new ChanSite(Arrays.asList("boards.4chan.org"), Arrays.asList("4cdn.org", "is.4chan.org", "is2.4chan.org")),
|
||||||
new ChanSite(Arrays.asList("archive.moe"), Arrays.asList("data.archive.moe")),
|
|
||||||
new ChanSite(Arrays.asList("4archive.org"), Arrays.asList("imgur.com")),
|
new ChanSite(Arrays.asList("4archive.org"), Arrays.asList("imgur.com")),
|
||||||
new ChanSite(Arrays.asList("archive.4plebs.org"), Arrays.asList("img.4plebs.org")),
|
new ChanSite(Arrays.asList("archive.4plebs.org"), Arrays.asList("img.4plebs.org"))
|
||||||
new ChanSite(Arrays.asList("fgts.jp"), Arrays.asList("dat.fgtsi.org"))
|
|
||||||
);
|
);
|
||||||
|
|
||||||
public static List<String> url_piece_blacklist = Arrays.asList(
|
private static List<String> url_piece_blacklist = Arrays.asList(
|
||||||
"=http",
|
"=http",
|
||||||
"http://imgops.com/",
|
"http://imgops.com/",
|
||||||
"iqdb.org",
|
"iqdb.org",
|
||||||
"saucenao.com"
|
"saucenao.com"
|
||||||
);
|
);
|
||||||
|
|
||||||
public ChanSite chanSite;
|
private ChanSite chanSite;
|
||||||
public Boolean generalChanSite = true;
|
private Boolean generalChanSite = true;
|
||||||
|
|
||||||
public ChanRipper(URL url) throws IOException {
|
public ChanRipper(URL url) throws IOException {
|
||||||
super(url);
|
super(url);
|
||||||
@ -66,13 +64,18 @@ public class ChanRipper extends AbstractHTMLRipper {
|
|||||||
try {
|
try {
|
||||||
// Attempt to use album title as GID
|
// Attempt to use album title as GID
|
||||||
Document doc = getFirstPage();
|
Document doc = getFirstPage();
|
||||||
String subject = doc.select(".post.op > .postinfo > .subject").first().text();
|
try {
|
||||||
return getHost() + "_" + getGID(url) + "_" + subject;
|
String subject = doc.select(".post.op > .postinfo > .subject").first().text();
|
||||||
|
return getHost() + "_" + getGID(url) + "_" + subject;
|
||||||
|
} catch (NullPointerException e) {
|
||||||
|
logger.warn("Failed to get thread title from " + url);
|
||||||
|
}
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
// Fall back to default album naming convention
|
// Fall back to default album naming convention
|
||||||
logger.warn("Failed to get album title from " + url, e);
|
logger.warn("Failed to get album title from " + url, e);
|
||||||
}
|
}
|
||||||
return super.getAlbumTitle(url);
|
// Fall back on the GID
|
||||||
|
return getHost() + "_" + getGID(url);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -114,6 +117,12 @@ public class ChanRipper extends AbstractHTMLRipper {
|
|||||||
if (m.matches()) {
|
if (m.matches()) {
|
||||||
return m.group(1);
|
return m.group(1);
|
||||||
}
|
}
|
||||||
|
// xchan
|
||||||
|
p = Pattern.compile("^.*\\.[a-z]{1,3}/board/[a-zA-Z0-9]+/thread/([0-9]+)/?.*$");
|
||||||
|
m = p.matcher(u);
|
||||||
|
if (m.matches()) {
|
||||||
|
return m.group(1);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
throw new MalformedURLException(
|
throw new MalformedURLException(
|
||||||
@ -143,7 +152,7 @@ public class ChanRipper extends AbstractHTMLRipper {
|
|||||||
}
|
}
|
||||||
@Override
|
@Override
|
||||||
public List<String> getURLsFromPage(Document page) {
|
public List<String> getURLsFromPage(Document page) {
|
||||||
List<String> imageURLs = new ArrayList<String>();
|
List<String> imageURLs = new ArrayList<>();
|
||||||
Pattern p; Matcher m;
|
Pattern p; Matcher m;
|
||||||
for (Element link : page.select("a")) {
|
for (Element link : page.select("a")) {
|
||||||
if (!link.hasAttr("href")) {
|
if (!link.hasAttr("href")) {
|
||||||
@ -208,6 +217,6 @@ public class ChanRipper extends AbstractHTMLRipper {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void downloadURL(URL url, int index) {
|
public void downloadURL(URL url, int index) {
|
||||||
addURLToDownload(url, getPrefix(index), "", this.url.toString(), null);
|
addURLToDownload(url, getPrefix(index));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,180 +0,0 @@
|
|||||||
package com.rarchives.ripme.ripper.rippers;
|
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.net.MalformedURLException;
|
|
||||||
import java.net.URL;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.regex.Matcher;
|
|
||||||
import java.util.regex.Pattern;
|
|
||||||
|
|
||||||
import org.jsoup.nodes.Document;
|
|
||||||
import org.jsoup.nodes.Element;
|
|
||||||
|
|
||||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
|
||||||
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
|
||||||
import com.rarchives.ripme.utils.Http;
|
|
||||||
|
|
||||||
public class CheebyRipper extends AbstractHTMLRipper {
|
|
||||||
|
|
||||||
private int offset = 0;
|
|
||||||
private Map<String, Integer> albumSets = new HashMap<String, Integer>();
|
|
||||||
|
|
||||||
public CheebyRipper(URL url) throws IOException {
|
|
||||||
super(url);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getHost() {
|
|
||||||
return "cheeby";
|
|
||||||
}
|
|
||||||
@Override
|
|
||||||
public String getDomain() {
|
|
||||||
return "cheeby.com";
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getGID(URL url) throws MalformedURLException {
|
|
||||||
Pattern p = Pattern.compile("^https?://[w.]*cheeby.com/u/([a-zA-Z0-9\\-_]{3,}).*$");
|
|
||||||
Matcher m = p.matcher(url.toExternalForm());
|
|
||||||
if (m.matches()) {
|
|
||||||
return m.group(1);
|
|
||||||
}
|
|
||||||
throw new MalformedURLException("cheeby user not found in " + url + ", expected http://cheeby.com/u/username");
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public URL sanitizeURL(URL url) throws MalformedURLException {
|
|
||||||
return new URL("http://cheeby.com/u/" + getGID(url) + "/pics");
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Document getFirstPage() throws IOException {
|
|
||||||
String url = this.url + "?limit=10&offset=0";
|
|
||||||
return Http.url(url)
|
|
||||||
.get();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Document getNextPage(Document doc) throws IOException {
|
|
||||||
sleep(500);
|
|
||||||
offset += 1;
|
|
||||||
String url = this.url + "?p=" + offset;
|
|
||||||
Document nextDoc = Http.url(url).get();
|
|
||||||
if (nextDoc.select("div.i a img").size() == 0) {
|
|
||||||
throw new IOException("No more images to fetch");
|
|
||||||
}
|
|
||||||
return nextDoc;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void downloadURL(URL url, int index) {
|
|
||||||
// Not implmeneted here
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public List<String> getURLsFromPage(Document page) {
|
|
||||||
// Not implemented here
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
public List<Image> getImagesFromPage(Document page) {
|
|
||||||
List<Image> imageURLs = new ArrayList<Image>();
|
|
||||||
for (Element image : page.select("div.i a img")) {
|
|
||||||
// Get image URL
|
|
||||||
String imageURL = image.attr("src");
|
|
||||||
imageURL = imageURL.replace("s.", ".");
|
|
||||||
|
|
||||||
// Get "album" from image link
|
|
||||||
String href = image.parent().attr("href");
|
|
||||||
while (href.endsWith("/")) {
|
|
||||||
href = href.substring(0, href.length() - 2);
|
|
||||||
}
|
|
||||||
String[] hrefs = href.split("/");
|
|
||||||
String prefix = hrefs[hrefs.length - 1];
|
|
||||||
|
|
||||||
// Keep track of how many images are in this album
|
|
||||||
int albumSetCount = 0;
|
|
||||||
if (albumSets.containsKey(prefix)) {
|
|
||||||
albumSetCount = albumSets.get(prefix);
|
|
||||||
}
|
|
||||||
albumSetCount++;
|
|
||||||
albumSets.put(prefix, albumSetCount);
|
|
||||||
|
|
||||||
imageURLs.add(new Image(imageURL, prefix, albumSetCount));
|
|
||||||
|
|
||||||
}
|
|
||||||
return imageURLs;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void rip() throws IOException {
|
|
||||||
logger.info("Retrieving " + this.url);
|
|
||||||
sendUpdate(STATUS.LOADING_RESOURCE, this.url.toExternalForm());
|
|
||||||
Document doc = getFirstPage();
|
|
||||||
|
|
||||||
while (doc != null) {
|
|
||||||
List<Image> images = getImagesFromPage(doc);
|
|
||||||
|
|
||||||
if (images.size() == 0) {
|
|
||||||
throw new IOException("No images found at " + doc.location());
|
|
||||||
}
|
|
||||||
|
|
||||||
for (Image image : images) {
|
|
||||||
if (isStopped()) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
// Don't create subdirectory if "album" only has 1 image
|
|
||||||
if (albumSets.get(image.prefix) > 1) {
|
|
||||||
addURLToDownload(new URL(image.url), getPrefix(image.index), image.prefix);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
addURLToDownload(new URL(image.url));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (isStopped()) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
sendUpdate(STATUS.LOADING_RESOURCE, "next page");
|
|
||||||
doc = getNextPage(doc);
|
|
||||||
} catch (IOException e) {
|
|
||||||
logger.info("Can't get next page: " + e.getMessage());
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// If they're using a thread pool, wait for it.
|
|
||||||
if (getThreadPool() != null) {
|
|
||||||
getThreadPool().waitForThreads();
|
|
||||||
}
|
|
||||||
waitForThreads();
|
|
||||||
|
|
||||||
// Delete empty subdirectories
|
|
||||||
for (String prefix : albumSets.keySet()) {
|
|
||||||
if (prefix.trim().equals("")) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
File f = new File(this.workingDir, prefix);
|
|
||||||
if (f.list() != null && f.list().length == 0) {
|
|
||||||
logger.info("Deleting empty directory: " + f.getAbsolutePath());
|
|
||||||
f.delete();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private class Image {
|
|
||||||
String url, prefix;
|
|
||||||
int index;
|
|
||||||
public Image(String url, String prefix, int index) {
|
|
||||||
this.url = url;
|
|
||||||
this.prefix = prefix;
|
|
||||||
this.index = index;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
@ -6,118 +6,119 @@ import java.net.URL;
|
|||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.TreeMap;
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
import org.jsoup.nodes.Document;
|
import org.jsoup.nodes.Document;
|
||||||
import org.jsoup.nodes.Element;
|
import org.jsoup.nodes.Element;
|
||||||
import org.jsoup.select.Elements;
|
|
||||||
|
|
||||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||||
import com.rarchives.ripme.utils.Http;
|
import com.rarchives.ripme.utils.Http;
|
||||||
|
|
||||||
public class CheveretoRipper extends AbstractHTMLRipper {
|
public class CheveretoRipper extends AbstractHTMLRipper {
|
||||||
|
private static final Map<String, String> CONSENT_COOKIE;
|
||||||
|
static {
|
||||||
|
CONSENT_COOKIE = new TreeMap<String, String>();
|
||||||
|
CONSENT_COOKIE.put("AGREE_CONSENT", "1");
|
||||||
|
}
|
||||||
|
|
||||||
public CheveretoRipper(URL url) throws IOException {
|
public CheveretoRipper(URL url) throws IOException {
|
||||||
super(url);
|
super(url);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static List<String> explicit_domains_1 = Arrays.asList("hushpix.com", "tag-fox.com");
|
private static List<String> explicit_domains_1 = Arrays.asList("hushpix.com", "tag-fox.com", "gwarchives.com");
|
||||||
@Override
|
|
||||||
public String getHost() {
|
|
||||||
String host = url.toExternalForm().split("/")[2];
|
|
||||||
return host;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getDomain() {
|
|
||||||
String host = url.toExternalForm().split("/")[2];
|
|
||||||
return host;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean canRip(URL url) {
|
|
||||||
String url_name = url.toExternalForm();
|
|
||||||
if (explicit_domains_1.contains(url_name.split("/")[2]) == true) {
|
|
||||||
Pattern pa = Pattern.compile("(?:https?://)?(?:www\\.)?[a-z1-9-]*\\.[a-z1-9]*/album/([a-zA-Z1-9]*)/?$");
|
|
||||||
Matcher ma = pa.matcher(url.toExternalForm());
|
|
||||||
if (ma.matches()) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getAlbumTitle(URL url) throws MalformedURLException {
|
|
||||||
try {
|
|
||||||
// Attempt to use album title as GID
|
|
||||||
Element titleElement = getFirstPage().select("meta[property=og:title]").first();
|
|
||||||
String title = titleElement.attr("content");
|
|
||||||
title = title.substring(title.lastIndexOf('/') + 1);
|
|
||||||
return getHost() + "_" + title.trim();
|
|
||||||
} catch (IOException e) {
|
|
||||||
// Fall back to default album naming convention
|
|
||||||
logger.info("Unable to find title at " + url);
|
|
||||||
}
|
|
||||||
return super.getAlbumTitle(url);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getGID(URL url) throws MalformedURLException {
|
|
||||||
Pattern p = Pattern.compile("(?:https?://)?(?:www\\.)?[a-z1-9-]*\\.[a-z1-9]*/album/([a-zA-Z1-9]*)/?$");
|
|
||||||
Matcher m = p.matcher(url.toExternalForm());
|
|
||||||
if (m.matches()) {
|
|
||||||
return m.group(1);
|
|
||||||
}
|
|
||||||
throw new MalformedURLException("Expected chevereto URL format: " +
|
|
||||||
"site.domain/album/albumName or site.domain/username/albums- got " + url + " instead");
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Document getFirstPage() throws IOException {
|
|
||||||
// "url" is an instance field of the superclass
|
|
||||||
return Http.url(url).get();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Document getNextPage(Document doc) throws IOException {
|
|
||||||
// Find next page
|
|
||||||
String nextUrl = "";
|
|
||||||
// We use comic-nav-next to the find the next page
|
|
||||||
Element elem = doc.select("li.pagination-next > a").first();
|
|
||||||
if (elem == null) {
|
|
||||||
throw new IOException("No more pages");
|
|
||||||
}
|
|
||||||
String nextPage = elem.attr("href");
|
|
||||||
// Some times this returns a empty string
|
|
||||||
// This for stops that
|
|
||||||
if (nextPage == "") {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
return Http.url(nextPage).get();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public List<String> getURLsFromPage(Document doc) {
|
|
||||||
List<String> result = new ArrayList<String>();
|
|
||||||
for (Element el : doc.select("a.image-container > img")) {
|
|
||||||
String imageSource = el.attr("src");
|
|
||||||
// We remove the .md from images so we download the full size image
|
|
||||||
// not the medium ones
|
|
||||||
imageSource = imageSource.replace(".md", "");
|
|
||||||
result.add(imageSource);
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void downloadURL(URL url, int index) {
|
|
||||||
addURLToDownload(url, getPrefix(index));
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getHost() {
|
||||||
|
return url.toExternalForm().split("/")[2];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getDomain() {
|
||||||
|
return url.toExternalForm().split("/")[2];
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean canRip(URL url) {
|
||||||
|
String url_name = url.toExternalForm();
|
||||||
|
if (explicit_domains_1.contains(url_name.split("/")[2])) {
|
||||||
|
Pattern pa = Pattern.compile("(?:https?://)?(?:www\\.)?[a-z1-9-]*\\.[a-z1-9]*/album/([a-zA-Z1-9]*)/?$");
|
||||||
|
Matcher ma = pa.matcher(url.toExternalForm());
|
||||||
|
if (ma.matches()) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||||
|
try {
|
||||||
|
// Attempt to use album title as GID
|
||||||
|
Element titleElement = getFirstPage().select("meta[property=og:title]").first();
|
||||||
|
String title = titleElement.attr("content");
|
||||||
|
title = title.substring(title.lastIndexOf('/') + 1);
|
||||||
|
return getHost() + "_" + title.trim();
|
||||||
|
} catch (IOException e) {
|
||||||
|
// Fall back to default album naming convention
|
||||||
|
logger.info("Unable to find title at " + url);
|
||||||
|
}
|
||||||
|
return super.getAlbumTitle(url);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getGID(URL url) throws MalformedURLException {
|
||||||
|
Pattern p = Pattern.compile("(?:https?://)?(?:www\\.)?[a-z1-9-]*\\.[a-z1-9]*/album/([a-zA-Z1-9]*)/?$");
|
||||||
|
Matcher m = p.matcher(url.toExternalForm());
|
||||||
|
if (m.matches()) {
|
||||||
|
return m.group(1);
|
||||||
|
}
|
||||||
|
throw new MalformedURLException("Expected chevereto URL format: " +
|
||||||
|
"site.domain/album/albumName or site.domain/username/albums- got " + url + " instead");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Document getFirstPage() throws IOException {
|
||||||
|
// "url" is an instance field of the superclass
|
||||||
|
return Http.url(url).cookies(CONSENT_COOKIE).get();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Document getNextPage(Document doc) throws IOException {
|
||||||
|
// Find next page
|
||||||
|
String nextUrl = "";
|
||||||
|
// We use comic-nav-next to the find the next page
|
||||||
|
Element elem = doc.select("li.pagination-next > a").first();
|
||||||
|
if (elem == null) {
|
||||||
|
throw new IOException("No more pages");
|
||||||
|
}
|
||||||
|
String nextPage = elem.attr("href");
|
||||||
|
// Some times this returns a empty string
|
||||||
|
// This for stops that
|
||||||
|
if (nextPage == "") {
|
||||||
|
return null;
|
||||||
|
} else {
|
||||||
|
return Http.url(nextPage).cookies(CONSENT_COOKIE).get();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<String> getURLsFromPage(Document doc) {
|
||||||
|
List<String> result = new ArrayList<>();
|
||||||
|
for (Element el : doc.select("a.image-container > img")) {
|
||||||
|
String imageSource = el.attr("src");
|
||||||
|
// We remove the .md from images so we download the full size image
|
||||||
|
// not the medium ones
|
||||||
|
imageSource = imageSource.replace(".md", "");
|
||||||
|
result.add(imageSource);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void downloadURL(URL url, int index) {
|
||||||
|
addURLToDownload(url, getPrefix(index));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -32,8 +32,8 @@ public class DeviantartRipper extends AbstractHTMLRipper {
|
|||||||
private static final int PAGE_SLEEP_TIME = 3000,
|
private static final int PAGE_SLEEP_TIME = 3000,
|
||||||
IMAGE_SLEEP_TIME = 2000;
|
IMAGE_SLEEP_TIME = 2000;
|
||||||
|
|
||||||
private Map<String,String> cookies = new HashMap<String,String>();
|
private Map<String,String> cookies = new HashMap<>();
|
||||||
private Set<String> triedURLs = new HashSet<String>();
|
private Set<String> triedURLs = new HashSet<>();
|
||||||
|
|
||||||
public DeviantartRipper(URL url) throws IOException {
|
public DeviantartRipper(URL url) throws IOException {
|
||||||
super(url);
|
super(url);
|
||||||
@ -63,7 +63,7 @@ public class DeviantartRipper extends AbstractHTMLRipper {
|
|||||||
u += "gallery/?";
|
u += "gallery/?";
|
||||||
}
|
}
|
||||||
|
|
||||||
Pattern p = Pattern.compile("^https?://([a-zA-Z0-9\\-]{1,})\\.deviantart\\.com/favou?rites/([0-9]+)/*?$");
|
Pattern p = Pattern.compile("^https?://([a-zA-Z0-9\\-]+)\\.deviantart\\.com/favou?rites/([0-9]+)/*?$");
|
||||||
Matcher m = p.matcher(url.toExternalForm());
|
Matcher m = p.matcher(url.toExternalForm());
|
||||||
if (!m.matches()) {
|
if (!m.matches()) {
|
||||||
String subdir = "/";
|
String subdir = "/";
|
||||||
@ -88,18 +88,18 @@ public class DeviantartRipper extends AbstractHTMLRipper {
|
|||||||
return m.group(1);
|
return m.group(1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
p = Pattern.compile("^https?://([a-zA-Z0-9\\-]{1,})\\.deviantart\\.com/gallery/([0-9]{1,}).*$");
|
p = Pattern.compile("^https?://([a-zA-Z0-9\\-]+)\\.deviantart\\.com/gallery/([0-9]+).*$");
|
||||||
m = p.matcher(url.toExternalForm());
|
m = p.matcher(url.toExternalForm());
|
||||||
if (m.matches()) {
|
if (m.matches()) {
|
||||||
// Subgallery
|
// Subgallery
|
||||||
return m.group(1) + "_" + m.group(2);
|
return m.group(1) + "_" + m.group(2);
|
||||||
}
|
}
|
||||||
p = Pattern.compile("^https?://([a-zA-Z0-9\\-]{1,})\\.deviantart\\.com/favou?rites/([0-9]+)/.*?$");
|
p = Pattern.compile("^https?://([a-zA-Z0-9\\-]+)\\.deviantart\\.com/favou?rites/([0-9]+)/.*?$");
|
||||||
m = p.matcher(url.toExternalForm());
|
m = p.matcher(url.toExternalForm());
|
||||||
if (m.matches()) {
|
if (m.matches()) {
|
||||||
return m.group(1) + "_faves_" + m.group(2);
|
return m.group(1) + "_faves_" + m.group(2);
|
||||||
}
|
}
|
||||||
p = Pattern.compile("^https?://([a-zA-Z0-9\\-]{1,})\\.deviantart\\.com/favou?rites/?$");
|
p = Pattern.compile("^https?://([a-zA-Z0-9\\-]+)\\.deviantart\\.com/favou?rites/?$");
|
||||||
m = p.matcher(url.toExternalForm());
|
m = p.matcher(url.toExternalForm());
|
||||||
if (m.matches()) {
|
if (m.matches()) {
|
||||||
// Subgallery
|
// Subgallery
|
||||||
@ -121,14 +121,14 @@ public class DeviantartRipper extends AbstractHTMLRipper {
|
|||||||
.cookies(cookies)
|
.cookies(cookies)
|
||||||
.get();
|
.get();
|
||||||
}
|
}
|
||||||
public String jsonToImage(Document page,String id) {
|
private String jsonToImage(Document page, String id) {
|
||||||
Elements js = page.select("script[type=\"text/javascript\"]");
|
Elements js = page.select("script[type=\"text/javascript\"]");
|
||||||
for (Element tag : js) {
|
for (Element tag : js) {
|
||||||
if (tag.html().contains("window.__pageload")) {
|
if (tag.html().contains("window.__pageload")) {
|
||||||
try {
|
try {
|
||||||
String script = tag.html();
|
String script = tag.html();
|
||||||
script = script.substring(script.indexOf("window.__pageload"));
|
script = script.substring(script.indexOf("window.__pageload"));
|
||||||
if (script.indexOf(id) < 0) {
|
if (!script.contains(id)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
script = script.substring(script.indexOf(id));
|
script = script.substring(script.indexOf(id));
|
||||||
@ -144,7 +144,7 @@ public class DeviantartRipper extends AbstractHTMLRipper {
|
|||||||
}
|
}
|
||||||
@Override
|
@Override
|
||||||
public List<String> getURLsFromPage(Document page) {
|
public List<String> getURLsFromPage(Document page) {
|
||||||
List<String> imageURLs = new ArrayList<String>();
|
List<String> imageURLs = new ArrayList<>();
|
||||||
|
|
||||||
// Iterate over all thumbnails
|
// Iterate over all thumbnails
|
||||||
for (Element thumb : page.select("div.zones-container span.thumb")) {
|
for (Element thumb : page.select("div.zones-container span.thumb")) {
|
||||||
@ -194,7 +194,7 @@ public class DeviantartRipper extends AbstractHTMLRipper {
|
|||||||
}
|
}
|
||||||
@Override
|
@Override
|
||||||
public List<String> getDescriptionsFromPage(Document page) {
|
public List<String> getDescriptionsFromPage(Document page) {
|
||||||
List<String> textURLs = new ArrayList<String>();
|
List<String> textURLs = new ArrayList<>();
|
||||||
// Iterate over all thumbnails
|
// Iterate over all thumbnails
|
||||||
for (Element thumb : page.select("div.zones-container span.thumb")) {
|
for (Element thumb : page.select("div.zones-container span.thumb")) {
|
||||||
logger.info(thumb.attr("href"));
|
logger.info(thumb.attr("href"));
|
||||||
@ -257,9 +257,9 @@ public class DeviantartRipper extends AbstractHTMLRipper {
|
|||||||
* @return Full-size image URL
|
* @return Full-size image URL
|
||||||
* @throws Exception If it can't find the full-size URL
|
* @throws Exception If it can't find the full-size URL
|
||||||
*/
|
*/
|
||||||
public static String thumbToFull(String thumb, boolean throwException) throws Exception {
|
private static String thumbToFull(String thumb, boolean throwException) throws Exception {
|
||||||
thumb = thumb.replace("http://th", "http://fc");
|
thumb = thumb.replace("http://th", "http://fc");
|
||||||
List<String> fields = new ArrayList<String>(Arrays.asList(thumb.split("/")));
|
List<String> fields = new ArrayList<>(Arrays.asList(thumb.split("/")));
|
||||||
fields.remove(4);
|
fields.remove(4);
|
||||||
if (!fields.get(4).equals("f") && throwException) {
|
if (!fields.get(4).equals("f") && throwException) {
|
||||||
// Not a full-size image
|
// Not a full-size image
|
||||||
@ -339,7 +339,7 @@ public class DeviantartRipper extends AbstractHTMLRipper {
|
|||||||
* @param page Page the thumbnail is retrieved from
|
* @param page Page the thumbnail is retrieved from
|
||||||
* @return Highest-resolution version of the image based on thumbnail URL and the page.
|
* @return Highest-resolution version of the image based on thumbnail URL and the page.
|
||||||
*/
|
*/
|
||||||
public String smallToFull(String thumb, String page) {
|
private String smallToFull(String thumb, String page) {
|
||||||
try {
|
try {
|
||||||
// Fetch the image page
|
// Fetch the image page
|
||||||
Response resp = Http.url(page)
|
Response resp = Http.url(page)
|
||||||
@ -373,7 +373,7 @@ public class DeviantartRipper extends AbstractHTMLRipper {
|
|||||||
}
|
}
|
||||||
cookieString = cookieString.substring(0,cookieString.length() - 1);
|
cookieString = cookieString.substring(0,cookieString.length() - 1);
|
||||||
con.setRequestProperty("Cookie",cookieString);
|
con.setRequestProperty("Cookie",cookieString);
|
||||||
con.setRequestProperty("User-Agent",this.USER_AGENT);
|
con.setRequestProperty("User-Agent", USER_AGENT);
|
||||||
con.setInstanceFollowRedirects(true);
|
con.setInstanceFollowRedirects(true);
|
||||||
con.connect();
|
con.connect();
|
||||||
int code = con.getResponseCode();
|
int code = con.getResponseCode();
|
||||||
@ -406,7 +406,7 @@ public class DeviantartRipper extends AbstractHTMLRipper {
|
|||||||
*/
|
*/
|
||||||
private Map<String, String> loginToDeviantart() throws IOException {
|
private Map<String, String> loginToDeviantart() throws IOException {
|
||||||
// Populate postData fields
|
// Populate postData fields
|
||||||
Map<String,String> postData = new HashMap<String,String>();
|
Map<String,String> postData = new HashMap<>();
|
||||||
String username = Utils.getConfigString("deviantart.username", new String(Base64.decode("Z3JhYnB5")));
|
String username = Utils.getConfigString("deviantart.username", new String(Base64.decode("Z3JhYnB5")));
|
||||||
String password = Utils.getConfigString("deviantart.password", new String(Base64.decode("ZmFrZXJz")));
|
String password = Utils.getConfigString("deviantart.password", new String(Base64.decode("ZmFrZXJz")));
|
||||||
if (username == null || password == null) {
|
if (username == null || password == null) {
|
||||||
|
@ -72,7 +72,7 @@ public class DrawcrowdRipper extends AbstractHTMLRipper {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<String> getURLsFromPage(Document page) {
|
public List<String> getURLsFromPage(Document page) {
|
||||||
List<String> imageURLs = new ArrayList<String>();
|
List<String> imageURLs = new ArrayList<>();
|
||||||
for (Element thumb : page.select("div.item.asset img")) {
|
for (Element thumb : page.select("div.item.asset img")) {
|
||||||
String image = thumb.attr("src");
|
String image = thumb.attr("src");
|
||||||
image = image
|
image = image
|
||||||
|
@ -0,0 +1,74 @@
|
|||||||
|
package com.rarchives.ripme.ripper.rippers;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.net.MalformedURLException;
|
||||||
|
import java.net.URL;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
import org.jsoup.nodes.Document;
|
||||||
|
import org.jsoup.nodes.Element;
|
||||||
|
import org.jsoup.select.Elements;
|
||||||
|
|
||||||
|
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||||
|
import com.rarchives.ripme.utils.Http;
|
||||||
|
|
||||||
|
public class DribbbleRipper extends AbstractHTMLRipper {
|
||||||
|
|
||||||
|
public DribbbleRipper(URL url) throws IOException {
|
||||||
|
super(url);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getHost() {
|
||||||
|
return "dribbble";
|
||||||
|
}
|
||||||
|
@Override
|
||||||
|
public String getDomain() {
|
||||||
|
return "dribbble.com";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getGID(URL url) throws MalformedURLException {
|
||||||
|
Pattern p = Pattern.compile("^https?://[wm.]*dribbble\\.com/([a-zA-Z0-9]+).*$");
|
||||||
|
Matcher m = p.matcher(url.toExternalForm());
|
||||||
|
if (m.matches()) {
|
||||||
|
return m.group(1);
|
||||||
|
}
|
||||||
|
throw new MalformedURLException("Expected dribbble.com URL format: " +
|
||||||
|
"dribbble.com/albumid - got " + url + "instead");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Document getFirstPage() throws IOException {
|
||||||
|
return Http.url(url).get();
|
||||||
|
}
|
||||||
|
@Override
|
||||||
|
public Document getNextPage(Document doc) throws IOException {
|
||||||
|
// Find next page
|
||||||
|
Elements hrefs = doc.select("a.next_page");
|
||||||
|
if (hrefs.size() == 0) {
|
||||||
|
throw new IOException("No more pages");
|
||||||
|
}
|
||||||
|
String nextUrl = "https://www.dribbble.com" + hrefs.first().attr("href");
|
||||||
|
sleep(500);
|
||||||
|
return Http.url(nextUrl).get();
|
||||||
|
}
|
||||||
|
@Override
|
||||||
|
public List<String> getURLsFromPage(Document doc) {
|
||||||
|
List<String> imageURLs = new ArrayList<>();
|
||||||
|
for (Element thumb : doc.select("a.dribbble-link > picture > source")) {
|
||||||
|
// nl skips thumbnails
|
||||||
|
if ( thumb.attr("srcset").contains("teaser")) continue;
|
||||||
|
String image = thumb.attr("srcset").replace("_1x", "");
|
||||||
|
imageURLs.add(image);
|
||||||
|
}
|
||||||
|
return imageURLs;
|
||||||
|
}
|
||||||
|
@Override
|
||||||
|
public void downloadURL(URL url, int index) {
|
||||||
|
addURLToDownload(url, getPrefix(index));
|
||||||
|
}
|
||||||
|
}
|
@ -5,167 +5,137 @@ import com.rarchives.ripme.ripper.DownloadThreadPool;
|
|||||||
import com.rarchives.ripme.utils.Http;
|
import com.rarchives.ripme.utils.Http;
|
||||||
import com.rarchives.ripme.utils.Utils;
|
import com.rarchives.ripme.utils.Utils;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.UnsupportedEncodingException;
|
|
||||||
import java.net.MalformedURLException;
|
import java.net.MalformedURLException;
|
||||||
|
import java.net.URI;
|
||||||
|
import java.net.URISyntaxException;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
import java.net.URLDecoder;
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.logging.Level;
|
import java.util.regex.Matcher;
|
||||||
import java.util.logging.Logger;
|
import java.util.regex.Pattern;
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
import org.jsoup.nodes.Document;
|
import org.jsoup.nodes.Document;
|
||||||
import org.jsoup.nodes.Element;
|
import org.jsoup.nodes.Element;
|
||||||
import org.jsoup.select.Elements;
|
import org.jsoup.select.Elements;
|
||||||
|
|
||||||
public class E621Ripper extends AbstractHTMLRipper {
|
public class E621Ripper extends AbstractHTMLRipper{
|
||||||
public static final int POOL_IMAGES_PER_PAGE = 24;
|
private static final Logger logger = Logger.getLogger(E621Ripper.class);
|
||||||
|
|
||||||
private DownloadThreadPool e621ThreadPool = new DownloadThreadPool("e621");
|
private static Pattern gidPattern=null;
|
||||||
|
private static Pattern gidPattern2=null;
|
||||||
|
private static Pattern gidPatternPool=null;
|
||||||
|
|
||||||
public E621Ripper(URL url) throws IOException {
|
private DownloadThreadPool e621ThreadPool=new DownloadThreadPool("e621");
|
||||||
super(url);
|
|
||||||
|
public E621Ripper(URL url) throws IOException {
|
||||||
|
super(url);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DownloadThreadPool getThreadPool() {
|
||||||
|
return e621ThreadPool;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getDomain() {
|
||||||
|
return "e621.net";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getHost() {
|
||||||
|
return "e621";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Document getFirstPage() throws IOException {
|
||||||
|
if(url.getPath().startsWith("/pool/show/"))
|
||||||
|
return Http.url("https://e621.net/pool/show/"+getTerm(url)).get();
|
||||||
|
else
|
||||||
|
return Http.url("https://e621.net/post/index/1/"+getTerm(url)).get();
|
||||||
|
}
|
||||||
|
|
||||||
|
private String getFullSizedImage(String url) {
|
||||||
|
try {
|
||||||
|
return Http.url("https://e621.net" + url).get().select("div > img#image").attr("src");
|
||||||
|
} catch (IOException e) {
|
||||||
|
logger.error("Unable to get full sized image from " + url);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public DownloadThreadPool getThreadPool() {
|
public List<String> getURLsFromPage(Document page) {
|
||||||
return e621ThreadPool;
|
Elements elements = page.select("div > span.thumb > a");
|
||||||
}
|
List<String> res = new ArrayList<>();
|
||||||
|
|
||||||
@Override
|
for(Element e:elements) {
|
||||||
public String getDomain() {
|
if (!e.attr("href").isEmpty()) {
|
||||||
return "e621.net";
|
String fullSizedImage = getFullSizedImage(e.attr("href"));
|
||||||
}
|
if (fullSizedImage != null && !fullSizedImage.equals("")) {
|
||||||
|
res.add(getFullSizedImage(e.attr("href")));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
return res;
|
||||||
public String getHost() {
|
}
|
||||||
return "e621";
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Document getFirstPage() throws IOException {
|
public Document getNextPage(Document page) throws IOException {
|
||||||
if (url.getPath().startsWith("/pool/show/")) {
|
if (page.select("a.next_page") != null) {
|
||||||
return Http.url("https://e621.net/pool/show/" + getTerm(url)).get();
|
return Http.url("https://e621.net" + page.select("a.next_page").attr("href")).get();
|
||||||
} else {
|
} else {
|
||||||
return Http.url("https://e621.net/post/index/1/" + getTerm(url)).get();
|
throw new IOException("No more pages");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<String> getURLsFromPage(Document page) {
|
public void downloadURL(final URL url, int index) {
|
||||||
Elements elements = page.select("#post-list .thumb a,#pool-show .thumb a");
|
addURLToDownload(url, getPrefix(index));
|
||||||
List<String> res = new ArrayList<String>(elements.size());
|
}
|
||||||
|
|
||||||
if (page.getElementById("pool-show") != null) {
|
private String getTerm(URL url) throws MalformedURLException{
|
||||||
int index = 0;
|
if(gidPattern==null)
|
||||||
|
gidPattern=Pattern.compile("^https?://(www\\.)?e621\\.net/post/index/[^/]+/([a-zA-Z0-9$_.+!*'(),%-]+)(/.*)?(#.*)?$");
|
||||||
|
if(gidPatternPool==null)
|
||||||
|
gidPatternPool=Pattern.compile("^https?://(www\\.)?e621\\.net/pool/show/([a-zA-Z0-9$_.+!*'(),%-]+)(\\?.*)?(/.*)?(#.*)?$");
|
||||||
|
|
||||||
Element e = page.getElementById("paginator");
|
Matcher m = gidPattern.matcher(url.toExternalForm());
|
||||||
if (e != null) {
|
if(m.matches())
|
||||||
e = e.getElementsByClass("current").first();
|
return m.group(2);
|
||||||
if (e != null) {
|
|
||||||
index = (Integer.parseInt(e.text()) - 1) * POOL_IMAGES_PER_PAGE;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for (Element e_ : elements) {
|
m = gidPatternPool.matcher(url.toExternalForm());
|
||||||
res.add(e_.absUrl("href") + "#" + ++index);
|
if(m.matches())
|
||||||
}
|
return m.group(2);
|
||||||
|
|
||||||
} else {
|
throw new MalformedURLException("Expected e621.net URL format: e621.net/post/index/1/searchterm - got "+url+" instead");
|
||||||
for (Element e : elements) {
|
}
|
||||||
res.add(e.absUrl("href") + "#" + e.child(0).attr("id").substring(1));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return res;
|
@Override
|
||||||
}
|
public String getGID(URL url) throws MalformedURLException {
|
||||||
|
try {
|
||||||
|
String prefix="";
|
||||||
|
if(url.getPath().startsWith("/pool/show/"))
|
||||||
|
prefix="pool_";
|
||||||
|
|
||||||
@Override
|
return Utils.filesystemSafe(prefix+new URI(getTerm(url)).getPath());
|
||||||
public Document getNextPage(Document page) throws IOException {
|
} catch (URISyntaxException ex) {
|
||||||
for (Element e : page.select("#paginator a")) {
|
logger.error(ex);
|
||||||
if (e.attr("rel").equals("next")) {
|
}
|
||||||
return Http.url(e.absUrl("href")).get();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return null;
|
throw new MalformedURLException("Expected e621.net URL format: e621.net/post/index/1/searchterm - got "+url+" instead");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void downloadURL(final URL url, int index) {
|
public URL sanitizeURL(URL url) throws MalformedURLException {
|
||||||
e621ThreadPool.addThread(new Thread(new Runnable() {
|
if(gidPattern2==null)
|
||||||
public void run() {
|
gidPattern2=Pattern.compile("^https?://(www\\.)?e621\\.net/post/search\\?tags=([a-zA-Z0-9$_.+!*'(),%-]+)(/.*)?(#.*)?$");
|
||||||
try {
|
|
||||||
Document page = Http.url(url).get();
|
|
||||||
Element e = page.getElementById("image");
|
|
||||||
|
|
||||||
if (e != null) {
|
Matcher m = gidPattern2.matcher(url.toExternalForm());
|
||||||
addURLToDownload(new URL(e.absUrl("src")), Utils.getConfigBoolean("download.save_order", true) ? url.getRef() + "-" : "");
|
if(m.matches())
|
||||||
} else if ((e = page.select(".content object>param[name=\"movie\"]").first()) != null) {
|
return new URL("https://e621.net/post/index/1/"+m.group(2).replace("+","%20"));
|
||||||
addURLToDownload(new URL(e.absUrl("value")), Utils.getConfigBoolean("download.save_order", true) ? url.getRef() + "-" : "");
|
|
||||||
} else {
|
|
||||||
Logger.getLogger(E621Ripper.class.getName()).log(Level.WARNING, "Unsupported media type - please report to program author: " + url.toString());
|
|
||||||
}
|
|
||||||
|
|
||||||
} catch (IOException ex) {
|
return url;
|
||||||
Logger.getLogger(E621Ripper.class.getName()).log(Level.SEVERE, null, ex);
|
}
|
||||||
}
|
|
||||||
}
|
|
||||||
}));
|
|
||||||
}
|
|
||||||
|
|
||||||
private String getTerm(URL url) throws MalformedURLException {
|
|
||||||
String query = url.getQuery();
|
|
||||||
|
|
||||||
if (query != null) {
|
|
||||||
return Utils.parseUrlQuery(query, "tags");
|
|
||||||
}
|
|
||||||
|
|
||||||
if (query == null) {
|
|
||||||
if ((query = url.getPath()).startsWith("/post/index/")) {
|
|
||||||
query = query.substring(12);
|
|
||||||
|
|
||||||
int pos = query.indexOf('/');
|
|
||||||
if (pos == -1) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
// skip page number
|
|
||||||
query = query.substring(pos + 1);
|
|
||||||
|
|
||||||
if (query.endsWith("/")) {
|
|
||||||
query = query.substring(0, query.length() - 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
return URLDecoder.decode(query, "UTF-8");
|
|
||||||
} catch (UnsupportedEncodingException e) {
|
|
||||||
// Shouldn't happen since UTF-8 is required to be supported
|
|
||||||
throw new RuntimeException(e);
|
|
||||||
}
|
|
||||||
|
|
||||||
} else if (query.startsWith("/pool/show/")) {
|
|
||||||
query = query.substring(11);
|
|
||||||
|
|
||||||
if (query.endsWith("/")) {
|
|
||||||
query = query.substring(0, query.length() - 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
return query;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getGID(URL url) throws MalformedURLException {
|
|
||||||
String prefix = "";
|
|
||||||
if (url.getPath().startsWith("/pool/show/")) {
|
|
||||||
prefix = "pool_";
|
|
||||||
} else {
|
|
||||||
prefix = "term_";
|
|
||||||
}
|
|
||||||
|
|
||||||
return Utils.filesystemSafe(prefix + getTerm(url));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
@ -39,7 +39,7 @@ public class EHentaiRipper extends AbstractHTMLRipper {
|
|||||||
// Current HTML document
|
// Current HTML document
|
||||||
private Document albumDoc = null;
|
private Document albumDoc = null;
|
||||||
|
|
||||||
private static final Map<String,String> cookies = new HashMap<String,String>();
|
private static final Map<String,String> cookies = new HashMap<>();
|
||||||
static {
|
static {
|
||||||
cookies.put("nw", "1");
|
cookies.put("nw", "1");
|
||||||
cookies.put("tip", "1");
|
cookies.put("tip", "1");
|
||||||
@ -162,7 +162,7 @@ public class EHentaiRipper extends AbstractHTMLRipper {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<String> getURLsFromPage(Document page) {
|
public List<String> getURLsFromPage(Document page) {
|
||||||
List<String> imageURLs = new ArrayList<String>();
|
List<String> imageURLs = new ArrayList<>();
|
||||||
Elements thumbs = page.select("#gdt > .gdtm a");
|
Elements thumbs = page.select("#gdt > .gdtm a");
|
||||||
// Iterate over images on page
|
// Iterate over images on page
|
||||||
for (Element thumb : thumbs) {
|
for (Element thumb : thumbs) {
|
||||||
@ -193,7 +193,7 @@ public class EHentaiRipper extends AbstractHTMLRipper {
|
|||||||
private int index;
|
private int index;
|
||||||
private File workingDir;
|
private File workingDir;
|
||||||
|
|
||||||
public EHentaiImageThread(URL url, int index, File workingDir) {
|
EHentaiImageThread(URL url, int index, File workingDir) {
|
||||||
super();
|
super();
|
||||||
this.url = url;
|
this.url = url;
|
||||||
this.index = index;
|
this.index = index;
|
||||||
|
@ -23,7 +23,12 @@ import com.rarchives.ripme.utils.Http;
|
|||||||
public class EightmusesRipper extends AbstractHTMLRipper {
|
public class EightmusesRipper extends AbstractHTMLRipper {
|
||||||
|
|
||||||
private Document albumDoc = null;
|
private Document albumDoc = null;
|
||||||
private Map<String,String> cookies = new HashMap<String,String>();
|
private Map<String,String> cookies = new HashMap<>();
|
||||||
|
// TODO put up a wiki page on using maps to store titles
|
||||||
|
// the map for storing the title of each album when downloading sub albums
|
||||||
|
private Map<URL,String> urlTitles = new HashMap<>();
|
||||||
|
|
||||||
|
private Boolean rippingSubalbums = false;
|
||||||
|
|
||||||
public EightmusesRipper(URL url) throws IOException {
|
public EightmusesRipper(URL url) throws IOException {
|
||||||
super(url);
|
super(url);
|
||||||
@ -76,33 +81,62 @@ public class EightmusesRipper extends AbstractHTMLRipper {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<String> getURLsFromPage(Document page) {
|
public List<String> getURLsFromPage(Document page) {
|
||||||
List<String> imageURLs = new ArrayList<String>();
|
List<String> imageURLs = new ArrayList<>();
|
||||||
if (page.select(".preview > span").size() > 0) {
|
// get the first image link on the page and check if the last char in it is a number
|
||||||
|
// if it is a number then we're ripping a comic if not it's a subalbum
|
||||||
|
String firstImageLink = page.select("div.gallery > a.t-hover").first().attr("href");
|
||||||
|
Pattern p = Pattern.compile("/comix/picture/([a-zA-Z0-9\\-_/]*/)?\\d+");
|
||||||
|
Matcher m = p.matcher(firstImageLink);
|
||||||
|
if (!m.matches()) {
|
||||||
|
logger.info("Ripping subalbums");
|
||||||
// Page contains subalbums (not images)
|
// Page contains subalbums (not images)
|
||||||
Elements albumElements = page.select("a.preview");
|
Elements albumElements = page.select("div.gallery > a.t-hover");
|
||||||
List<Element> albumsList = albumElements.subList(0, albumElements.size());
|
List<Element> albumsList = albumElements.subList(0, albumElements.size());
|
||||||
Collections.reverse(albumsList);
|
Collections.reverse(albumsList);
|
||||||
// Iterate over elements in reverse order
|
// Iterate over elements in reverse order
|
||||||
for (Element subalbum : albumsList) {
|
for (Element subalbum : albumsList) {
|
||||||
String subUrl = subalbum.attr("href");
|
String subUrl = subalbum.attr("href");
|
||||||
subUrl = subUrl.replaceAll("\\.\\./", "");
|
// This if is to skip ads which don't have a href
|
||||||
if (subUrl.startsWith("//")) {
|
if (subUrl != "") {
|
||||||
subUrl = "http:";
|
subUrl = subUrl.replaceAll("\\.\\./", "");
|
||||||
}
|
if (subUrl.startsWith("//")) {
|
||||||
else if (!subUrl.startsWith("http://")) {
|
subUrl = "https:";
|
||||||
subUrl = "http://www.8muses.com/" + subUrl;
|
}
|
||||||
}
|
else if (!subUrl.startsWith("http://")) {
|
||||||
try {
|
subUrl = "https://www.8muses.com" + subUrl;
|
||||||
logger.info("Retrieving " + subUrl);
|
}
|
||||||
sendUpdate(STATUS.LOADING_RESOURCE, subUrl);
|
try {
|
||||||
Document subPage = Http.url(subUrl).get();
|
logger.info("Retrieving " + subUrl);
|
||||||
// Get all images in subalbum, add to list.
|
sendUpdate(STATUS.LOADING_RESOURCE, subUrl);
|
||||||
List<String> subalbumImages = getURLsFromPage(subPage);
|
Document subPage = Http.url(subUrl).get();
|
||||||
logger.info("Found " + subalbumImages.size() + " images in subalbum");
|
// Get all images in subalbum, add to list.
|
||||||
imageURLs.addAll(subalbumImages);
|
List<String> subalbumImages = getURLsFromPage(subPage);
|
||||||
} catch (IOException e) {
|
String albumTitle = subPage.select("meta[name=description]").attr("content");
|
||||||
logger.warn("Error while loading subalbum " + subUrl, e);
|
albumTitle = albumTitle.replace("A huge collection of free porn comics for adults. Read ", "");
|
||||||
continue;
|
albumTitle = albumTitle.replace(" online for free at 8muses.com", "");
|
||||||
|
albumTitle = albumTitle.replace(" ", "_");
|
||||||
|
// albumTitle = albumTitle.replace("Sex and Porn Comics", "");
|
||||||
|
// albumTitle = albumTitle.replace("|", "");
|
||||||
|
// albumTitle = albumTitle.replace("8muses", "");
|
||||||
|
// albumTitle = albumTitle.replaceAll("-", "_");
|
||||||
|
// albumTitle = albumTitle.replaceAll(" ", "_");
|
||||||
|
// albumTitle = albumTitle.replaceAll("___", "_");
|
||||||
|
// albumTitle = albumTitle.replaceAll("__", "_");
|
||||||
|
// // This is here to remove the trailing __ from folder names
|
||||||
|
// albumTitle = albumTitle.replaceAll("__", "");
|
||||||
|
logger.info("Found " + subalbumImages.size() + " images in subalbum");
|
||||||
|
int prefix = 1;
|
||||||
|
for (String image : subalbumImages) {
|
||||||
|
URL imageUrl = new URL(image);
|
||||||
|
// urlTitles.put(imageUrl, albumTitle);
|
||||||
|
addURLToDownload(imageUrl, getPrefix(prefix), albumTitle, this.url.toExternalForm(), cookies);
|
||||||
|
prefix = prefix + 1;
|
||||||
|
}
|
||||||
|
rippingSubalbums = true;
|
||||||
|
imageURLs.addAll(subalbumImages);
|
||||||
|
} catch (IOException e) {
|
||||||
|
logger.warn("Error while loading subalbum " + subUrl, e);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -142,10 +176,10 @@ public class EightmusesRipper extends AbstractHTMLRipper {
|
|||||||
|
|
||||||
private String getFullSizeImage(String imageUrl) throws IOException {
|
private String getFullSizeImage(String imageUrl) throws IOException {
|
||||||
sendUpdate(STATUS.LOADING_RESOURCE, imageUrl);
|
sendUpdate(STATUS.LOADING_RESOURCE, imageUrl);
|
||||||
|
logger.info("Getting full sized image from " + imageUrl);
|
||||||
Document doc = new Http(imageUrl).get(); // Retrieve the webpage of the image URL
|
Document doc = new Http(imageUrl).get(); // Retrieve the webpage of the image URL
|
||||||
Element fullSizeImage = doc.select(".photo").first(); // Select the "photo" element from the page (there should only be 1)
|
String imageName = doc.select("input[id=imageName]").attr("value"); // Select the "input" element from the page
|
||||||
String path = "https://cdn.ampproject.org/i/s/www.8muses.com/data/ufu/small/" + fullSizeImage.children().select("#imageName").attr("value"); // Append the path to the fullsize image file to the standard prefix
|
return "https://www.8muses.com/image/fm/" + imageName;
|
||||||
return path;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -9,7 +9,6 @@ import java.io.IOException;
|
|||||||
import java.net.MalformedURLException;
|
import java.net.MalformedURLException;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
@ -18,10 +17,8 @@ import org.jsoup.Connection.Response;
|
|||||||
import org.jsoup.nodes.Document;
|
import org.jsoup.nodes.Document;
|
||||||
import org.jsoup.nodes.Element;
|
import org.jsoup.nodes.Element;
|
||||||
import org.jsoup.select.Elements;
|
import org.jsoup.select.Elements;
|
||||||
import org.jsoup.Connection.Method;
|
|
||||||
|
|
||||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||||
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
|
||||||
import com.rarchives.ripme.utils.Http;
|
import com.rarchives.ripme.utils.Http;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -71,19 +68,13 @@ public class EroShareRipper extends AbstractHTMLRipper {
|
|||||||
|
|
||||||
Pattern p_eroshare_profile = Pattern.compile("^https?://eroshare.com/u/([a-zA-Z0-9\\-_]+)/?$");
|
Pattern p_eroshare_profile = Pattern.compile("^https?://eroshare.com/u/([a-zA-Z0-9\\-_]+)/?$");
|
||||||
Matcher m_eroshare_profile = p_eroshare_profile.matcher(url.toExternalForm());
|
Matcher m_eroshare_profile = p_eroshare_profile.matcher(url.toExternalForm());
|
||||||
if (m_eroshare_profile.matches()) {
|
return m_eroshare_profile.matches();
|
||||||
return true;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean is_profile(URL url) {
|
private boolean is_profile(URL url) {
|
||||||
Pattern pa = Pattern.compile("^https?://eroshae.com/u/([a-zA-Z0-9\\-_]+)/?$");
|
Pattern pa = Pattern.compile("^https?://eroshae.com/u/([a-zA-Z0-9\\-_]+)/?$");
|
||||||
Matcher ma = pa.matcher(url.toExternalForm());
|
Matcher ma = pa.matcher(url.toExternalForm());
|
||||||
if (ma.matches()) {
|
return ma.matches();
|
||||||
return true;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -103,7 +94,7 @@ public class EroShareRipper extends AbstractHTMLRipper {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getAlbumTitle(URL url) throws MalformedURLException {
|
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||||
if (is_profile(url) == false) {
|
if (!is_profile(url)) {
|
||||||
try {
|
try {
|
||||||
// Attempt to use album title as GID
|
// Attempt to use album title as GID
|
||||||
Element titleElement = getFirstPage().select("meta[property=og:title]").first();
|
Element titleElement = getFirstPage().select("meta[property=og:title]").first();
|
||||||
@ -122,7 +113,7 @@ public class EroShareRipper extends AbstractHTMLRipper {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<String> getURLsFromPage(Document doc) {
|
public List<String> getURLsFromPage(Document doc) {
|
||||||
List<String> URLs = new ArrayList<String>();
|
List<String> URLs = new ArrayList<>();
|
||||||
//Pictures
|
//Pictures
|
||||||
Elements imgs = doc.getElementsByTag("img");
|
Elements imgs = doc.getElementsByTag("img");
|
||||||
for (Element img : imgs) {
|
for (Element img : imgs) {
|
||||||
@ -172,9 +163,7 @@ public class EroShareRipper extends AbstractHTMLRipper {
|
|||||||
.ignoreContentType()
|
.ignoreContentType()
|
||||||
.response();
|
.response();
|
||||||
|
|
||||||
Document doc = resp.parse();
|
return resp.parse();
|
||||||
|
|
||||||
return doc;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -214,7 +203,7 @@ public class EroShareRipper extends AbstractHTMLRipper {
|
|||||||
|
|
||||||
Document doc = resp.parse();
|
Document doc = resp.parse();
|
||||||
|
|
||||||
List<URL> URLs = new ArrayList<URL>();
|
List<URL> URLs = new ArrayList<>();
|
||||||
//Pictures
|
//Pictures
|
||||||
Elements imgs = doc.getElementsByTag("img");
|
Elements imgs = doc.getElementsByTag("img");
|
||||||
for (Element img : imgs) {
|
for (Element img : imgs) {
|
||||||
|
@ -1,15 +1,9 @@
|
|||||||
/*
|
|
||||||
* To change this license header, choose License Headers in Project Properties.
|
|
||||||
* To change this template file, choose Tools | Templates
|
|
||||||
* and open the template in the editor.
|
|
||||||
*/
|
|
||||||
package com.rarchives.ripme.ripper.rippers;
|
package com.rarchives.ripme.ripper.rippers;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.MalformedURLException;
|
import java.net.MalformedURLException;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
@ -18,10 +12,8 @@ import org.jsoup.Connection.Response;
|
|||||||
import org.jsoup.nodes.Document;
|
import org.jsoup.nodes.Document;
|
||||||
import org.jsoup.nodes.Element;
|
import org.jsoup.nodes.Element;
|
||||||
import org.jsoup.select.Elements;
|
import org.jsoup.select.Elements;
|
||||||
import org.jsoup.Connection.Method;
|
|
||||||
|
|
||||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||||
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
|
||||||
import com.rarchives.ripme.utils.Http;
|
import com.rarchives.ripme.utils.Http;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -65,10 +57,15 @@ public class EromeRipper extends AbstractHTMLRipper {
|
|||||||
return super.getAlbumTitle(url);
|
return super.getAlbumTitle(url);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public URL sanitizeURL(URL url) throws MalformedURLException {
|
||||||
|
return new URL(url.toExternalForm().replaceAll("https?://erome.com", "https://www.erome.com"));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<String> getURLsFromPage(Document doc) {
|
public List<String> getURLsFromPage(Document doc) {
|
||||||
List<String> URLs = new ArrayList<String>();
|
List<String> URLs = new ArrayList<>();
|
||||||
//Pictures
|
//Pictures
|
||||||
Elements imgs = doc.select("div.img > img.img-front");
|
Elements imgs = doc.select("div.img > img.img-front");
|
||||||
for (Element img : imgs) {
|
for (Element img : imgs) {
|
||||||
@ -92,9 +89,7 @@ public class EromeRipper extends AbstractHTMLRipper {
|
|||||||
.ignoreContentType()
|
.ignoreContentType()
|
||||||
.response();
|
.response();
|
||||||
|
|
||||||
Document doc = resp.parse();
|
return resp.parse();
|
||||||
|
|
||||||
return doc;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -104,7 +99,15 @@ public class EromeRipper extends AbstractHTMLRipper {
|
|||||||
if (m.matches()) {
|
if (m.matches()) {
|
||||||
return m.group(1);
|
return m.group(1);
|
||||||
}
|
}
|
||||||
throw new MalformedURLException("erome album not found in " + url + ", expected https://erome.com/album");
|
|
||||||
|
p = Pattern.compile("^https?://erome.com/a/([a-zA-Z0-9]*)/?$");
|
||||||
|
m = p.matcher(url.toExternalForm());
|
||||||
|
|
||||||
|
if (m.matches()) {
|
||||||
|
return m.group(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new MalformedURLException("erome album not found in " + url + ", expected https://www.erome.com/album");
|
||||||
}
|
}
|
||||||
|
|
||||||
public static List<URL> getURLs(URL url) throws IOException{
|
public static List<URL> getURLs(URL url) throws IOException{
|
||||||
@ -115,7 +118,7 @@ public class EromeRipper extends AbstractHTMLRipper {
|
|||||||
|
|
||||||
Document doc = resp.parse();
|
Document doc = resp.parse();
|
||||||
|
|
||||||
List<URL> URLs = new ArrayList<URL>();
|
List<URL> URLs = new ArrayList<>();
|
||||||
//Pictures
|
//Pictures
|
||||||
Elements imgs = doc.getElementsByTag("img");
|
Elements imgs = doc.getElementsByTag("img");
|
||||||
for (Element img : imgs) {
|
for (Element img : imgs) {
|
||||||
|
@ -1,101 +0,0 @@
|
|||||||
package com.rarchives.ripme.ripper.rippers;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.net.MalformedURLException;
|
|
||||||
import java.net.URL;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.regex.Matcher;
|
|
||||||
import java.util.regex.Pattern;
|
|
||||||
|
|
||||||
import org.jsoup.nodes.Document;
|
|
||||||
import org.jsoup.nodes.Element;
|
|
||||||
|
|
||||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
|
||||||
import com.rarchives.ripme.utils.Http;
|
|
||||||
|
|
||||||
public class FapprovedRipper extends AbstractHTMLRipper {
|
|
||||||
|
|
||||||
private int pageIndex = 1;
|
|
||||||
private String username = null;
|
|
||||||
|
|
||||||
public FapprovedRipper(URL url) throws IOException {
|
|
||||||
super(url);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getHost() {
|
|
||||||
return "fapproved";
|
|
||||||
}
|
|
||||||
@Override
|
|
||||||
public String getDomain() {
|
|
||||||
return "fapproved.com";
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getGID(URL url) throws MalformedURLException {
|
|
||||||
Pattern p = Pattern.compile("^https?://[w.]*fapproved.com/users/([a-zA-Z0-9\\-_]{3,}).*$");
|
|
||||||
Matcher m = p.matcher(url.toExternalForm());
|
|
||||||
if (m.matches()) {
|
|
||||||
username = m.group(1);
|
|
||||||
return username;
|
|
||||||
}
|
|
||||||
throw new MalformedURLException("Fapproved user not found in " + url + ", expected http://fapproved.com/users/username/images");
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public URL sanitizeURL(URL url) throws MalformedURLException {
|
|
||||||
return new URL("http://fapproved.com/users/" + getGID(url));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Document getFirstPage() throws IOException {
|
|
||||||
pageIndex = 1;
|
|
||||||
String pageURL = getPageURL(pageIndex);
|
|
||||||
return Http.url(pageURL)
|
|
||||||
.ignoreContentType()
|
|
||||||
.get();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Document getNextPage(Document doc) throws IOException {
|
|
||||||
if ((doc.select("div.pagination li.next.disabled").size() != 0)
|
|
||||||
|| (doc.select("div.pagination").size() == 0)) {
|
|
||||||
throw new IOException("No more pages found");
|
|
||||||
}
|
|
||||||
sleep(1000);
|
|
||||||
pageIndex++;
|
|
||||||
String pageURL = getPageURL(pageIndex);
|
|
||||||
return Http.url(pageURL)
|
|
||||||
.ignoreContentType()
|
|
||||||
.get();
|
|
||||||
}
|
|
||||||
|
|
||||||
private String getPageURL(int index) throws IOException {
|
|
||||||
if (username == null) {
|
|
||||||
username = getGID(this.url);
|
|
||||||
}
|
|
||||||
return "http://fapproved.com/users/" + username + "/images?page=" + pageIndex;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public List<String> getURLsFromPage(Document page) {
|
|
||||||
List<String> imageURLs = new ArrayList<String>();
|
|
||||||
for (Element image : page.select("div.actual-image img")) {
|
|
||||||
String imageURL = image.attr("src");
|
|
||||||
if (imageURL.startsWith("//")) {
|
|
||||||
imageURL = "http:" + imageURL;
|
|
||||||
}
|
|
||||||
else if (imageURL.startsWith("/")) {
|
|
||||||
imageURL = "http://fapproved.com" + imageURL;
|
|
||||||
}
|
|
||||||
imageURLs.add(imageURL);
|
|
||||||
}
|
|
||||||
return imageURLs;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void downloadURL(URL url, int index) {
|
|
||||||
addURLToDownload(url, getPrefix(index));
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,91 +0,0 @@
|
|||||||
package com.rarchives.ripme.ripper.rippers;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.net.MalformedURLException;
|
|
||||||
import java.net.URL;
|
|
||||||
import java.util.regex.Matcher;
|
|
||||||
import java.util.regex.Pattern;
|
|
||||||
|
|
||||||
import org.jsoup.HttpStatusException;
|
|
||||||
import org.jsoup.nodes.Document;
|
|
||||||
import org.jsoup.nodes.Element;
|
|
||||||
|
|
||||||
import com.rarchives.ripme.ripper.AlbumRipper;
|
|
||||||
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
|
||||||
import com.rarchives.ripme.utils.Http;
|
|
||||||
import org.jsoup.select.Elements;
|
|
||||||
|
|
||||||
public class FineboxRipper extends AlbumRipper {
|
|
||||||
|
|
||||||
private static final String DOMAIN = "finebox.co",
|
|
||||||
DOMAIN_OLD = "vinebox.co",
|
|
||||||
HOST = "finebox";
|
|
||||||
|
|
||||||
public FineboxRipper(URL url) throws IOException {
|
|
||||||
super(url);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean canRip(URL url) {
|
|
||||||
return url.getHost().endsWith(DOMAIN) || url.getHost().endsWith(DOMAIN_OLD);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public URL sanitizeURL(URL url) throws MalformedURLException {
|
|
||||||
return new URL("http://"+DOMAIN+"/u/" + getGID(url));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void rip() throws IOException {
|
|
||||||
int page = 0;
|
|
||||||
Document doc;
|
|
||||||
Boolean hasPagesLeft = true;
|
|
||||||
while (hasPagesLeft) {
|
|
||||||
page++;
|
|
||||||
String urlPaged = this.url.toExternalForm() + "?page=" + page;
|
|
||||||
logger.info("Retrieving " + urlPaged);
|
|
||||||
sendUpdate(STATUS.LOADING_RESOURCE, urlPaged);
|
|
||||||
try {
|
|
||||||
doc = Http.url(this.url).get();
|
|
||||||
} catch (HttpStatusException e) {
|
|
||||||
logger.debug("Hit end of pages at page " + page, e);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
Elements videos = doc.select("video");
|
|
||||||
for (Element element : videos) {
|
|
||||||
String videourl = element.select("source").attr("src");
|
|
||||||
if (!videourl.startsWith("http")) {
|
|
||||||
videourl = "http://" + DOMAIN + videourl;
|
|
||||||
}
|
|
||||||
logger.info("URL to download: " + videourl);
|
|
||||||
if (!addURLToDownload(new URL(videourl))) {
|
|
||||||
hasPagesLeft = false;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
try {
|
|
||||||
Thread.sleep(1000);
|
|
||||||
} catch (InterruptedException e) {
|
|
||||||
logger.error("[!] Interrupted while waiting to load next page", e);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
waitForThreads();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getHost() {
|
|
||||||
return HOST;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getGID(URL url) throws MalformedURLException {
|
|
||||||
Pattern p = Pattern.compile("^https?://(www\\.)?(v|f)inebox\\.co/u/([a-zA-Z0-9]{1,}).*$");
|
|
||||||
Matcher m = p.matcher(url.toExternalForm());
|
|
||||||
if (!m.matches()) {
|
|
||||||
throw new MalformedURLException("Expected format: http://"+DOMAIN+"/u/USERNAME");
|
|
||||||
}
|
|
||||||
return m.group(m.groupCount());
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
@ -256,7 +256,7 @@ public class FivehundredpxRipper extends AbstractJSONRipper {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<String> getURLsFromJSON(JSONObject json) {
|
public List<String> getURLsFromJSON(JSONObject json) {
|
||||||
List<String> imageURLs = new ArrayList<String>();
|
List<String> imageURLs = new ArrayList<>();
|
||||||
JSONArray photos = json.getJSONArray("photos");
|
JSONArray photos = json.getJSONArray("photos");
|
||||||
for (int i = 0; i < photos.length(); i++) {
|
for (int i = 0; i < photos.length(); i++) {
|
||||||
if (super.isStopped()) {
|
if (super.isStopped()) {
|
||||||
|
@ -28,9 +28,9 @@ import com.rarchives.ripme.utils.Utils;
|
|||||||
public class FlickrRipper extends AbstractHTMLRipper {
|
public class FlickrRipper extends AbstractHTMLRipper {
|
||||||
|
|
||||||
private int page = 1;
|
private int page = 1;
|
||||||
private Set<String> attempted = new HashSet<String>();
|
private Set<String> attempted = new HashSet<>();
|
||||||
private Document albumDoc = null;
|
private Document albumDoc = null;
|
||||||
private DownloadThreadPool flickrThreadPool;
|
private final DownloadThreadPool flickrThreadPool;
|
||||||
@Override
|
@Override
|
||||||
public DownloadThreadPool getThreadPool() {
|
public DownloadThreadPool getThreadPool() {
|
||||||
return flickrThreadPool;
|
return flickrThreadPool;
|
||||||
@ -162,7 +162,7 @@ public class FlickrRipper extends AbstractHTMLRipper {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<String> getURLsFromPage(Document page) {
|
public List<String> getURLsFromPage(Document page) {
|
||||||
List<String> imageURLs = new ArrayList<String>();
|
List<String> imageURLs = new ArrayList<>();
|
||||||
for (Element thumb : page.select("a[data-track=photo-click]")) {
|
for (Element thumb : page.select("a[data-track=photo-click]")) {
|
||||||
/* TODO find a way to persist the image title
|
/* TODO find a way to persist the image title
|
||||||
String imageTitle = null;
|
String imageTitle = null;
|
||||||
@ -215,7 +215,7 @@ public class FlickrRipper extends AbstractHTMLRipper {
|
|||||||
.method(Method.GET)
|
.method(Method.GET)
|
||||||
.execute();
|
.execute();
|
||||||
Document doc = resp.parse();
|
Document doc = resp.parse();
|
||||||
Map<String,String> postData = new HashMap<String,String>();
|
Map<String,String> postData = new HashMap<>();
|
||||||
for (Element input : doc.select("input[type=hidden]")) {
|
for (Element input : doc.select("input[type=hidden]")) {
|
||||||
postData.put(input.attr("name"), input.attr("value"));
|
postData.put(input.attr("name"), input.attr("value"));
|
||||||
}
|
}
|
||||||
@ -239,7 +239,7 @@ public class FlickrRipper extends AbstractHTMLRipper {
|
|||||||
private URL url;
|
private URL url;
|
||||||
private int index;
|
private int index;
|
||||||
|
|
||||||
public FlickrImageThread(URL url, int index) {
|
FlickrImageThread(URL url, int index) {
|
||||||
super();
|
super();
|
||||||
this.url = url;
|
this.url = url;
|
||||||
this.index = index;
|
this.index = index;
|
||||||
@ -252,7 +252,6 @@ public class FlickrRipper extends AbstractHTMLRipper {
|
|||||||
Elements fullsizeImages = doc.select("div#allsizes-photo img");
|
Elements fullsizeImages = doc.select("div#allsizes-photo img");
|
||||||
if (fullsizeImages.size() == 0) {
|
if (fullsizeImages.size() == 0) {
|
||||||
logger.error("Could not find flickr image at " + doc.location() + " - missing 'div#allsizes-photo img'");
|
logger.error("Could not find flickr image at " + doc.location() + " - missing 'div#allsizes-photo img'");
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
String prefix = "";
|
String prefix = "";
|
||||||
|
@ -28,12 +28,16 @@ import com.rarchives.ripme.utils.Http;
|
|||||||
|
|
||||||
public class FuraffinityRipper extends AbstractHTMLRipper {
|
public class FuraffinityRipper extends AbstractHTMLRipper {
|
||||||
|
|
||||||
static Map<String, String> cookies=null;
|
private static final String urlBase = "https://www.furaffinity.net";
|
||||||
static final String urlBase = "https://www.furaffinity.net";
|
private static Map<String,String> cookies = new HashMap<>();
|
||||||
|
static {
|
||||||
|
cookies.put("b", "bd5ccac8-51dc-4265-8ae1-7eac685ad667");
|
||||||
|
cookies.put("a", "7c41b782-d01d-4b0e-b45b-62a4f0b2a369");
|
||||||
|
}
|
||||||
|
|
||||||
// Thread pool for finding direct image links from "image" pages (html)
|
// Thread pool for finding direct image links from "image" pages (html)
|
||||||
private DownloadThreadPool furaffinityThreadPool
|
private DownloadThreadPool furaffinityThreadPool
|
||||||
= new DownloadThreadPool( "furaffinity");
|
= new DownloadThreadPool( "furaffinity");
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public DownloadThreadPool getThreadPool() {
|
public DownloadThreadPool getThreadPool() {
|
||||||
@ -55,73 +59,49 @@ public class FuraffinityRipper extends AbstractHTMLRipper {
|
|||||||
}
|
}
|
||||||
@Override
|
@Override
|
||||||
public boolean hasDescriptionSupport() {
|
public boolean hasDescriptionSupport() {
|
||||||
return true;
|
return false;
|
||||||
}
|
}
|
||||||
@Override
|
@Override
|
||||||
public Document getFirstPage() throws IOException {
|
public Document getFirstPage() throws IOException {
|
||||||
if (cookies == null || cookies.size() == 0) {
|
|
||||||
login();
|
|
||||||
}
|
|
||||||
|
|
||||||
return Http.url(url).cookies(cookies).get();
|
return Http.url(url).cookies(cookies).get();
|
||||||
}
|
}
|
||||||
|
|
||||||
private void login() throws IOException {
|
|
||||||
String user = new String(Base64.decode("cmlwbWU="));
|
|
||||||
String pass = new String(Base64.decode("cmlwbWVwYXNzd29yZA=="));
|
|
||||||
|
|
||||||
Response loginPage = Http.url(urlBase + "/login/")
|
|
||||||
.referrer(urlBase)
|
|
||||||
.response();
|
|
||||||
cookies = loginPage.cookies();
|
|
||||||
|
|
||||||
Map<String,String> formData = new HashMap<String,String>();
|
|
||||||
formData.put("action", "login");
|
|
||||||
formData.put("retard_protection", "1");
|
|
||||||
formData.put("name", user);
|
|
||||||
formData.put("pass", pass);
|
|
||||||
formData.put("login", "Login to FurAffinity");
|
|
||||||
|
|
||||||
Response doLogin = Http.url(urlBase + "/login/?ref=" + url)
|
|
||||||
.referrer(urlBase + "/login/")
|
|
||||||
.cookies(cookies)
|
|
||||||
.data(formData)
|
|
||||||
.method(Method.POST)
|
|
||||||
.response();
|
|
||||||
cookies.putAll(doLogin.cookies());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Document getNextPage(Document doc) throws IOException {
|
public Document getNextPage(Document doc) throws IOException {
|
||||||
// Find next page
|
// Find next page
|
||||||
Elements nextPageUrl = doc.select("td[align=right] form");
|
Elements nextPageUrl = doc.select("a.right");
|
||||||
if (nextPageUrl.size() == 0) {
|
if (nextPageUrl.size() == 0) {
|
||||||
throw new IOException("No more pages");
|
throw new IOException("No more pages");
|
||||||
}
|
}
|
||||||
String nextUrl = urlBase + nextPageUrl.first().attr("action");
|
String nextUrl = urlBase + nextPageUrl.first().attr("href");
|
||||||
|
|
||||||
sleep(500);
|
sleep(500);
|
||||||
Document nextPage = Http.url(nextUrl).cookies(cookies).get();
|
Document nextPage = Http.url(nextUrl).cookies(cookies).get();
|
||||||
|
|
||||||
Elements hrefs = nextPage.select("div#no-images");
|
|
||||||
if (hrefs.size() != 0) {
|
|
||||||
throw new IOException("No more pages");
|
|
||||||
}
|
|
||||||
return nextPage;
|
return nextPage;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private String getImageFromPost(String url) {
|
||||||
|
try {
|
||||||
|
logger.info("found url " + Http.url(url).cookies(cookies).get().select("meta[property=og:image]").attr("content"));
|
||||||
|
return Http.url(url).cookies(cookies).get().select("meta[property=og:image]").attr("content");
|
||||||
|
} catch (IOException e) {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<String> getURLsFromPage(Document page) {
|
public List<String> getURLsFromPage(Document page) {
|
||||||
List<String> urls = new ArrayList<String>();
|
List<String> urls = new ArrayList<>();
|
||||||
Elements urlElements = page.select("figure.t-image > b > u > a");
|
Elements urlElements = page.select("figure.t-image > b > u > a");
|
||||||
for (Element e : urlElements) {
|
for (Element e : urlElements) {
|
||||||
urls.add(urlBase + e.select("a").first().attr("href"));
|
urls.add(getImageFromPost(urlBase + e.select("a").first().attr("href")));
|
||||||
}
|
}
|
||||||
return urls;
|
return urls;
|
||||||
}
|
}
|
||||||
@Override
|
@Override
|
||||||
public List<String> getDescriptionsFromPage(Document page) {
|
public List<String> getDescriptionsFromPage(Document page) {
|
||||||
List<String> urls = new ArrayList<String>();
|
List<String> urls = new ArrayList<>();
|
||||||
Elements urlElements = page.select("figure.t-image > b > u > a");
|
Elements urlElements = page.select("figure.t-image > b > u > a");
|
||||||
for (Element e : urlElements) {
|
for (Element e : urlElements) {
|
||||||
urls.add(urlBase + e.select("a").first().attr("href"));
|
urls.add(urlBase + e.select("a").first().attr("href"));
|
||||||
@ -138,7 +118,6 @@ public class FuraffinityRipper extends AbstractHTMLRipper {
|
|||||||
// Fetch the image page
|
// Fetch the image page
|
||||||
Response resp = Http.url(page)
|
Response resp = Http.url(page)
|
||||||
.referrer(this.url)
|
.referrer(this.url)
|
||||||
.cookies(cookies)
|
|
||||||
.response();
|
.response();
|
||||||
cookies.putAll(resp.cookies());
|
cookies.putAll(resp.cookies());
|
||||||
|
|
||||||
@ -157,9 +136,7 @@ public class FuraffinityRipper extends AbstractHTMLRipper {
|
|||||||
ele.select("p").prepend("\\n\\n");
|
ele.select("p").prepend("\\n\\n");
|
||||||
logger.debug("Returning description at " + page);
|
logger.debug("Returning description at " + page);
|
||||||
String tempPage = Jsoup.clean(ele.html().replaceAll("\\\\n", System.getProperty("line.separator")), "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false));
|
String tempPage = Jsoup.clean(ele.html().replaceAll("\\\\n", System.getProperty("line.separator")), "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false));
|
||||||
String title = documentz.select("meta[property=og:title]").attr("content");
|
return documentz.select("meta[property=og:title]").attr("content") + "\n" + tempPage; // Overridden saveText takes first line and makes it the file name.
|
||||||
String tempText = title;
|
|
||||||
return tempText + "\n" + tempPage; // Overridden saveText takes first line and makes it the file name.
|
|
||||||
} catch (IOException ioe) {
|
} catch (IOException ioe) {
|
||||||
logger.info("Failed to get description " + page + " : '" + ioe.getMessage() + "'");
|
logger.info("Failed to get description " + page + " : '" + ioe.getMessage() + "'");
|
||||||
return null;
|
return null;
|
||||||
@ -167,8 +144,8 @@ public class FuraffinityRipper extends AbstractHTMLRipper {
|
|||||||
}
|
}
|
||||||
@Override
|
@Override
|
||||||
public boolean saveText(URL url, String subdirectory, String text, int index) {
|
public boolean saveText(URL url, String subdirectory, String text, int index) {
|
||||||
//TODO Make this better please?
|
//TODO Make this better please?
|
||||||
try {
|
try {
|
||||||
stopCheck();
|
stopCheck();
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
return false;
|
return false;
|
||||||
@ -179,7 +156,7 @@ public class FuraffinityRipper extends AbstractHTMLRipper {
|
|||||||
saveAs = text.split("\n")[0];
|
saveAs = text.split("\n")[0];
|
||||||
saveAs = saveAs.replaceAll("^(\\S+)\\s+by\\s+(.*)$", "$2_$1");
|
saveAs = saveAs.replaceAll("^(\\S+)\\s+by\\s+(.*)$", "$2_$1");
|
||||||
for (int i = 1;i < text.split("\n").length; i++) {
|
for (int i = 1;i < text.split("\n").length; i++) {
|
||||||
newText = newText.replace("\\","").replace("/","").replace("~","") + "\n" + text.split("\n")[i];
|
newText = newText.replace("\\","").replace("/","").replace("~","") + "\n" + text.split("\n")[i];
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
if (!subdirectory.equals("")) {
|
if (!subdirectory.equals("")) {
|
||||||
@ -212,8 +189,7 @@ public class FuraffinityRipper extends AbstractHTMLRipper {
|
|||||||
}
|
}
|
||||||
@Override
|
@Override
|
||||||
public void downloadURL(URL url, int index) {
|
public void downloadURL(URL url, int index) {
|
||||||
furaffinityThreadPool.addThread(new FuraffinityDocumentThread(url));
|
addURLToDownload(url, getPrefix(index));
|
||||||
sleep(250);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -224,6 +200,7 @@ public class FuraffinityRipper extends AbstractHTMLRipper {
|
|||||||
if (m.matches()) {
|
if (m.matches()) {
|
||||||
return m.group(1);
|
return m.group(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
throw new MalformedURLException("Expected furaffinity.net URL format: "
|
throw new MalformedURLException("Expected furaffinity.net URL format: "
|
||||||
+ "www.furaffinity.net/gallery/username - got " + url
|
+ "www.furaffinity.net/gallery/username - got " + url
|
||||||
+ " instead");
|
+ " instead");
|
||||||
@ -232,42 +209,13 @@ public class FuraffinityRipper extends AbstractHTMLRipper {
|
|||||||
private class FuraffinityDocumentThread extends Thread {
|
private class FuraffinityDocumentThread extends Thread {
|
||||||
private URL url;
|
private URL url;
|
||||||
|
|
||||||
public FuraffinityDocumentThread(URL url) {
|
FuraffinityDocumentThread(URL url) {
|
||||||
super();
|
super();
|
||||||
this.url = url;
|
this.url = url;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public void run() {
|
|
||||||
try {
|
|
||||||
Document doc = Http.url(url).cookies(cookies).get();
|
|
||||||
// Find image
|
|
||||||
Elements donwloadLink = doc.select("div.alt1 b a[href^=//d.facdn.net/]");
|
|
||||||
if (donwloadLink.size() == 0) {
|
|
||||||
logger.warn("Could not download " + this.url);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
String link = "http:" + donwloadLink.first().attr("href");
|
|
||||||
logger.info("Found URL " + link);
|
|
||||||
String[] fileNameSplit = link.split("/");
|
|
||||||
String fileName = fileNameSplit[fileNameSplit.length -1];
|
|
||||||
fileName = fileName.replaceAll("[0-9]*\\.", "");
|
|
||||||
String[] fileExtSplit = link.split("\\.");
|
|
||||||
String fileExt = fileExtSplit[fileExtSplit.length -1];
|
|
||||||
fileName = fileName.replaceAll(fileExt, "");
|
|
||||||
File saveAS;
|
|
||||||
fileName = fileName.replace("[0-9]*\\.", "");
|
|
||||||
saveAS = new File(
|
|
||||||
workingDir.getCanonicalPath()
|
|
||||||
+ File.separator
|
|
||||||
+ fileName
|
|
||||||
+ "."
|
|
||||||
+ fileExt);
|
|
||||||
addURLToDownload(new URL(link),saveAS,"",cookies);
|
|
||||||
} catch (IOException e) {
|
|
||||||
logger.error("[!] Exception while loading/parsing " + this.url, e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
|
||||||
|
}
|
@ -60,7 +60,7 @@ public class FuskatorRipper extends AbstractHTMLRipper {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<String> getURLsFromPage(Document doc) {
|
public List<String> getURLsFromPage(Document doc) {
|
||||||
List<String> imageURLs = new ArrayList<String>();
|
List<String> imageURLs = new ArrayList<>();
|
||||||
String html = doc.html();
|
String html = doc.html();
|
||||||
// Get "baseUrl"
|
// Get "baseUrl"
|
||||||
String baseUrl = Utils.between(html, "unescape('", "'").get(0);
|
String baseUrl = Utils.between(html, "unescape('", "'").get(0);
|
||||||
|
@ -1,113 +0,0 @@
|
|||||||
package com.rarchives.ripme.ripper.rippers;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.net.MalformedURLException;
|
|
||||||
import java.net.URL;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.regex.Matcher;
|
|
||||||
import java.util.regex.Pattern;
|
|
||||||
|
|
||||||
import org.jsoup.Connection.Method;
|
|
||||||
import org.jsoup.Connection.Response;
|
|
||||||
import org.jsoup.nodes.Document;
|
|
||||||
import org.jsoup.nodes.Element;
|
|
||||||
|
|
||||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
|
||||||
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
|
||||||
import com.rarchives.ripme.utils.Http;
|
|
||||||
|
|
||||||
public class GifyoRipper extends AbstractHTMLRipper {
|
|
||||||
|
|
||||||
private int page = 0;
|
|
||||||
private Map<String,String> cookies = new HashMap<String,String>();
|
|
||||||
|
|
||||||
public GifyoRipper(URL url) throws IOException {
|
|
||||||
super(url);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getHost() {
|
|
||||||
return "gifyo";
|
|
||||||
}
|
|
||||||
@Override
|
|
||||||
public String getDomain() {
|
|
||||||
return "gifyo.com";
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getGID(URL url) throws MalformedURLException {
|
|
||||||
Pattern p = Pattern.compile("^https?://[w.]*gifyo.com/([a-zA-Z0-9\\-_]+)/?$");
|
|
||||||
Matcher m = p.matcher(url.toExternalForm());
|
|
||||||
if (m.matches()) {
|
|
||||||
return m.group(1);
|
|
||||||
}
|
|
||||||
throw new MalformedURLException("Gifyo user not found in " + url + ", expected http://gifyo.com/username");
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public URL sanitizeURL(URL url) throws MalformedURLException {
|
|
||||||
return new URL("http://gifyo.com/" + getGID(url) + "/");
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Document getFirstPage() throws IOException {
|
|
||||||
Response resp = Http.url(this.url)
|
|
||||||
.ignoreContentType()
|
|
||||||
.response();
|
|
||||||
cookies = resp.cookies();
|
|
||||||
|
|
||||||
Document doc = resp.parse();
|
|
||||||
if (doc.html().contains("profile is private")) {
|
|
||||||
sendUpdate(STATUS.RIP_ERRORED, "User has private profile");
|
|
||||||
throw new IOException("User has private profile");
|
|
||||||
}
|
|
||||||
return doc;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Document getNextPage(Document doc) throws IOException {
|
|
||||||
page++;
|
|
||||||
Map<String,String> postData = new HashMap<String,String>();
|
|
||||||
postData.put("cmd", "refreshData");
|
|
||||||
postData.put("view", "gif");
|
|
||||||
postData.put("layout", "grid");
|
|
||||||
postData.put("page", Integer.toString(page));
|
|
||||||
Response resp = Http.url(this.url)
|
|
||||||
.ignoreContentType()
|
|
||||||
.data(postData)
|
|
||||||
.cookies(cookies)
|
|
||||||
.method(Method.POST)
|
|
||||||
.response();
|
|
||||||
cookies.putAll(resp.cookies());
|
|
||||||
Document nextDoc = resp.parse();
|
|
||||||
if (nextDoc.select("div.gif img").size() == 0) {
|
|
||||||
throw new IOException("No more images found");
|
|
||||||
}
|
|
||||||
sleep(2000);
|
|
||||||
return nextDoc;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public List<String> getURLsFromPage(Document doc) {
|
|
||||||
List<String> imageURLs = new ArrayList<String>();
|
|
||||||
for (Element image : doc.select("img.profile_gif")) {
|
|
||||||
String imageUrl = image.attr("data-animated");
|
|
||||||
if (imageUrl.startsWith("//")) {
|
|
||||||
imageUrl = "http:" + imageUrl;
|
|
||||||
}
|
|
||||||
imageUrl = imageUrl.replace("/medium/", "/large/");
|
|
||||||
imageUrl = imageUrl.replace("_s.gif", ".gif");
|
|
||||||
imageURLs.add(imageUrl);
|
|
||||||
}
|
|
||||||
logger.debug("Found " + imageURLs.size() + " images");
|
|
||||||
return imageURLs;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void downloadURL(URL url, int index) {
|
|
||||||
addURLToDownload(url);
|
|
||||||
}
|
|
||||||
}
|
|
@ -50,7 +50,7 @@ public class GirlsOfDesireRipper extends AbstractHTMLRipper {
|
|||||||
Pattern p;
|
Pattern p;
|
||||||
Matcher m;
|
Matcher m;
|
||||||
|
|
||||||
p = Pattern.compile("^www\\.girlsofdesire\\.org\\/galleries\\/([\\w\\d-]+)\\/$");
|
p = Pattern.compile("^www\\.girlsofdesire\\.org/galleries/([\\w\\d-]+)/$");
|
||||||
m = p.matcher(url.toExternalForm());
|
m = p.matcher(url.toExternalForm());
|
||||||
if (m.matches()) {
|
if (m.matches()) {
|
||||||
return m.group(1);
|
return m.group(1);
|
||||||
@ -72,7 +72,7 @@ public class GirlsOfDesireRipper extends AbstractHTMLRipper {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<String> getURLsFromPage(Document doc) {
|
public List<String> getURLsFromPage(Document doc) {
|
||||||
List<String> imageURLs = new ArrayList<String>();
|
List<String> imageURLs = new ArrayList<>();
|
||||||
for (Element thumb : doc.select("td.vtop > a > img")) {
|
for (Element thumb : doc.select("td.vtop > a > img")) {
|
||||||
String imgSrc = thumb.attr("src");
|
String imgSrc = thumb.attr("src");
|
||||||
imgSrc = imgSrc.replaceAll("_thumb\\.", ".");
|
imgSrc = imgSrc.replaceAll("_thumb\\.", ".");
|
||||||
|
@ -0,0 +1,78 @@
|
|||||||
|
package com.rarchives.ripme.ripper.rippers;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.net.MalformedURLException;
|
||||||
|
import java.net.URL;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
import org.jsoup.nodes.Document;
|
||||||
|
import org.jsoup.nodes.Element;
|
||||||
|
|
||||||
|
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||||
|
import com.rarchives.ripme.utils.Http;
|
||||||
|
|
||||||
|
public class HbrowseRipper extends AbstractHTMLRipper {
|
||||||
|
|
||||||
|
public HbrowseRipper(URL url) throws IOException {
|
||||||
|
super(url);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getHost() {
|
||||||
|
return "hbrowse";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getDomain() {
|
||||||
|
return "hbrowse.com";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getGID(URL url) throws MalformedURLException {
|
||||||
|
Pattern p = Pattern.compile("http://www.hbrowse.com/(\\d+)/[a-zA-Z0-9]*");
|
||||||
|
Matcher m = p.matcher(url.toExternalForm());
|
||||||
|
if (m.matches()) {
|
||||||
|
return m.group(1);
|
||||||
|
}
|
||||||
|
throw new MalformedURLException("Expected hbrowse.com URL format: " +
|
||||||
|
"hbrowse.com/ID/COMICID - got " + url + " instead");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Document getFirstPage() throws IOException {
|
||||||
|
// "url" is an instance field of the superclass
|
||||||
|
Document tempDoc = Http.url(url).get();
|
||||||
|
return Http.url(tempDoc.select("td[id=pageTopHome] > a[title=view thumbnails (top)]").attr("href")).get();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||||
|
try {
|
||||||
|
Document doc = getFirstPage();
|
||||||
|
String title = doc.select("div[id=main] > table.listTable > tbody > tr > td.listLong").first().text();
|
||||||
|
return getHost() + "_" + title + "_" + getGID(url);
|
||||||
|
} catch (Exception e) {
|
||||||
|
// Fall back to default album naming convention
|
||||||
|
logger.warn("Failed to get album title from " + url, e);
|
||||||
|
}
|
||||||
|
return super.getAlbumTitle(url);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<String> getURLsFromPage(Document doc) {
|
||||||
|
List<String> result = new ArrayList<String>();
|
||||||
|
for (Element el : doc.select("table > tbody > tr > td > a > img")) {
|
||||||
|
String imageURL = el.attr("src").replace("/zzz", "");
|
||||||
|
result.add(imageURL);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void downloadURL(URL url, int index) {
|
||||||
|
addURLToDownload(url, getPrefix(index));
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,113 @@
|
|||||||
|
package com.rarchives.ripme.ripper.rippers;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.net.MalformedURLException;
|
||||||
|
import java.net.URL;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
import org.jsoup.nodes.Document;
|
||||||
|
import org.jsoup.nodes.Element;
|
||||||
|
|
||||||
|
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||||
|
import com.rarchives.ripme.utils.Http;
|
||||||
|
|
||||||
|
public class Hentai2readRipper extends AbstractHTMLRipper {
|
||||||
|
String lastPage;
|
||||||
|
|
||||||
|
public Hentai2readRipper(URL url) throws IOException {
|
||||||
|
super(url);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getHost() {
|
||||||
|
return "hentai2read";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getDomain() {
|
||||||
|
return "hentai2read.com";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getGID(URL url) throws MalformedURLException {
|
||||||
|
Pattern p = Pattern.compile("https://hentai2read\\.com/([a-zA-Z0-9_-]*)/?");
|
||||||
|
Matcher m = p.matcher(url.toExternalForm());
|
||||||
|
if (m.matches()) {
|
||||||
|
return m.group(1);
|
||||||
|
}
|
||||||
|
throw new MalformedURLException("Expected hentai2read.com URL format: " +
|
||||||
|
"hbrowse.com/COMICID - got " + url + " instead");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Document getFirstPage() throws IOException {
|
||||||
|
Document tempDoc;
|
||||||
|
// get the first page of the comic
|
||||||
|
if (url.toExternalForm().substring(url.toExternalForm().length() - 1).equals("/")) {
|
||||||
|
tempDoc = Http.url(url + "1").get();
|
||||||
|
} else {
|
||||||
|
tempDoc = Http.url(url + "/1").get();
|
||||||
|
}
|
||||||
|
for (Element el : tempDoc.select("ul.nav > li > a")) {
|
||||||
|
if (el.attr("href").startsWith("https://hentai2read.com/thumbnails/")) {
|
||||||
|
// Get the page with the thumbnails
|
||||||
|
return Http.url(el.attr("href")).get();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
throw new IOException("Unable to get first page");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||||
|
try {
|
||||||
|
Document doc = getFirstPage();
|
||||||
|
String title = doc.select("span[itemprop=title]").text();
|
||||||
|
return getHost() + "_" + title;
|
||||||
|
} catch (Exception e) {
|
||||||
|
// Fall back to default album naming convention
|
||||||
|
logger.warn("Failed to get album title from " + url, e);
|
||||||
|
}
|
||||||
|
return super.getAlbumTitle(url);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<String> getURLsFromPage(Document doc) {
|
||||||
|
List<String> result = new ArrayList<String>();
|
||||||
|
for (Element el : doc.select("div.block-content > div > div.img-container > a > img.img-responsive")) {
|
||||||
|
String imageURL = "https:" + el.attr("src");
|
||||||
|
imageURL = imageURL.replace("hentaicdn.com", "static.hentaicdn.com");
|
||||||
|
imageURL = imageURL.replace("thumbnails/", "");
|
||||||
|
imageURL = imageURL.replace("tmb", "");
|
||||||
|
result.add(imageURL);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Document getNextPage(Document doc) throws IOException {
|
||||||
|
// Find next page
|
||||||
|
String nextUrl = "";
|
||||||
|
Element elem = doc.select("div.bg-white > ul.pagination > li > a").last();
|
||||||
|
if (elem == null) {
|
||||||
|
throw new IOException("No more pages");
|
||||||
|
}
|
||||||
|
nextUrl = elem.attr("href");
|
||||||
|
// We use the global lastPage to check if we've already ripped this page
|
||||||
|
// and is so we quit as there are no more pages
|
||||||
|
if (nextUrl.equals(lastPage)) {
|
||||||
|
throw new IOException("No more pages");
|
||||||
|
}
|
||||||
|
lastPage = nextUrl;
|
||||||
|
// Sleep for half a sec to avoid getting IP banned
|
||||||
|
sleep(500);
|
||||||
|
return Http.url(nextUrl).get();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void downloadURL(URL url, int index) {
|
||||||
|
addURLToDownload(url, getPrefix(index));
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,73 @@
|
|||||||
|
package com.rarchives.ripme.ripper.rippers;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.net.MalformedURLException;
|
||||||
|
import java.net.URL;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
import org.jsoup.nodes.Document;
|
||||||
|
|
||||||
|
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||||
|
import com.rarchives.ripme.utils.Http;
|
||||||
|
|
||||||
|
public class HentaiCafeRipper extends AbstractHTMLRipper {
|
||||||
|
|
||||||
|
public HentaiCafeRipper(URL url) throws IOException {
|
||||||
|
super(url);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getHost() {
|
||||||
|
return "hentai";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getDomain() {
|
||||||
|
return "hentai.cafe";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getGID(URL url) throws MalformedURLException {
|
||||||
|
Pattern p = Pattern.compile("https?://hentai\\.cafe/([a-zA-Z0-9_\\-%]*)/?$");
|
||||||
|
Matcher m = p.matcher(url.toExternalForm());
|
||||||
|
if (m.matches()) {
|
||||||
|
return m.group(1);
|
||||||
|
}
|
||||||
|
throw new MalformedURLException("Expected hentai.cafe URL format: " +
|
||||||
|
"hentai.cafe/COMIC - got " + url + " instead");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Document getFirstPage() throws IOException {
|
||||||
|
// "url" is an instance field of the superclass
|
||||||
|
Document tempDoc = Http.url(url).get();
|
||||||
|
return Http.url(tempDoc.select("div.last > p > a.x-btn").attr("href")).get();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Document getNextPage(Document doc) throws IOException {
|
||||||
|
String nextPageURL = doc.select("div[id=page] > div.inner > a").attr("href");
|
||||||
|
int totalPages = Integer.parseInt(doc.select("div.panel > div.topbar > div > div.topbar_right > div.tbtitle > div.text").text().replace(" ⤵", ""));
|
||||||
|
String[] nextPageURLSplite = nextPageURL.split("/");
|
||||||
|
// This checks if the next page number is greater than the total number of pages
|
||||||
|
if (totalPages >= Integer.parseInt(nextPageURLSplite[nextPageURLSplite.length -1])) {
|
||||||
|
return Http.url(nextPageURL).get();
|
||||||
|
}
|
||||||
|
throw new IOException("No more pages");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<String> getURLsFromPage(Document doc) {
|
||||||
|
List<String> result = new ArrayList<>();
|
||||||
|
result.add(doc.select("div[id=page] > div.inner > a > img.open").attr("src"));
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void downloadURL(URL url, int index) {
|
||||||
|
addURLToDownload(url, getPrefix(index));
|
||||||
|
}
|
||||||
|
}
|
@ -20,7 +20,7 @@ import com.rarchives.ripme.utils.Http;
|
|||||||
|
|
||||||
public class HentaifoundryRipper extends AbstractHTMLRipper {
|
public class HentaifoundryRipper extends AbstractHTMLRipper {
|
||||||
|
|
||||||
private Map<String,String> cookies = new HashMap<String,String>();
|
private Map<String,String> cookies = new HashMap<>();
|
||||||
public HentaifoundryRipper(URL url) throws IOException {
|
public HentaifoundryRipper(URL url) throws IOException {
|
||||||
super(url);
|
super(url);
|
||||||
}
|
}
|
||||||
@ -84,7 +84,7 @@ public class HentaifoundryRipper extends AbstractHTMLRipper {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<String> getURLsFromPage(Document doc) {
|
public List<String> getURLsFromPage(Document doc) {
|
||||||
List<String> imageURLs = new ArrayList<String>();
|
List<String> imageURLs = new ArrayList<>();
|
||||||
Pattern imgRegex = Pattern.compile(".*/user/([a-zA-Z0-9\\-_]+)/(\\d+)/.*");
|
Pattern imgRegex = Pattern.compile(".*/user/([a-zA-Z0-9\\-_]+)/(\\d+)/.*");
|
||||||
for (Element thumb : doc.select("div.thumb_square > a.thumbLink")) {
|
for (Element thumb : doc.select("div.thumb_square > a.thumbLink")) {
|
||||||
if (isStopped()) {
|
if (isStopped()) {
|
||||||
@ -115,7 +115,7 @@ public class HentaifoundryRipper extends AbstractHTMLRipper {
|
|||||||
imagePage = null;
|
imagePage = null;
|
||||||
}
|
}
|
||||||
// This is here for when the image is resized to a thumbnail because ripme doesn't report a screensize
|
// This is here for when the image is resized to a thumbnail because ripme doesn't report a screensize
|
||||||
if (imagePage.select("div.boxbody > img.center").attr("src").contains("thumbs.") == true) {
|
if (imagePage.select("div.boxbody > img.center").attr("src").contains("thumbs.")) {
|
||||||
imageURLs.add("http:" + imagePage.select("div.boxbody > img.center").attr("onclick").replace("this.src=", "").replace("'", "").replace("; $(#resize_message).hide();", ""));
|
imageURLs.add("http:" + imagePage.select("div.boxbody > img.center").attr("onclick").replace("this.src=", "").replace("'", "").replace("; $(#resize_message).hide();", ""));
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
@ -31,7 +31,7 @@ public class ImagearnRipper extends AbstractHTMLRipper {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getGID(URL url) throws MalformedURLException {
|
public String getGID(URL url) throws MalformedURLException {
|
||||||
Pattern p = Pattern.compile("^.*imagearn.com/{1,}gallery.php\\?id=([0-9]{1,}).*$");
|
Pattern p = Pattern.compile("^.*imagearn.com/+gallery.php\\?id=([0-9]+).*$");
|
||||||
Matcher m = p.matcher(url.toExternalForm());
|
Matcher m = p.matcher(url.toExternalForm());
|
||||||
if (m.matches()) {
|
if (m.matches()) {
|
||||||
return m.group(1);
|
return m.group(1);
|
||||||
@ -43,7 +43,7 @@ public class ImagearnRipper extends AbstractHTMLRipper {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public URL sanitizeURL(URL url) throws MalformedURLException {
|
public URL sanitizeURL(URL url) throws MalformedURLException {
|
||||||
Pattern p = Pattern.compile("^.*imagearn.com/{1,}image.php\\?id=[0-9]{1,}.*$");
|
Pattern p = Pattern.compile("^.*imagearn.com/+image.php\\?id=[0-9]+.*$");
|
||||||
Matcher m = p.matcher(url.toExternalForm());
|
Matcher m = p.matcher(url.toExternalForm());
|
||||||
if (m.matches()) {
|
if (m.matches()) {
|
||||||
// URL points to imagearn *image*, not gallery
|
// URL points to imagearn *image*, not gallery
|
||||||
@ -75,9 +75,22 @@ public class ImagearnRipper extends AbstractHTMLRipper {
|
|||||||
return Http.url(url).get();
|
return Http.url(url).get();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||||
|
try {
|
||||||
|
Document doc = getFirstPage();
|
||||||
|
String title = doc.select("h3 > strong").first().text(); // profile name
|
||||||
|
return getHost() + "_" + title + "_" + getGID(url);
|
||||||
|
} catch (Exception e) {
|
||||||
|
// Fall back to default album naming convention
|
||||||
|
logger.warn("Failed to get album title from " + url, e);
|
||||||
|
}
|
||||||
|
return super.getAlbumTitle(url);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<String> getURLsFromPage(Document doc) {
|
public List<String> getURLsFromPage(Document doc) {
|
||||||
List<String> imageURLs = new ArrayList<String>();
|
List<String> imageURLs = new ArrayList<>();
|
||||||
for (Element thumb : doc.select("div#gallery > div > a")) {
|
for (Element thumb : doc.select("div#gallery > div > a")) {
|
||||||
String imageURL = thumb.attr("href");
|
String imageURL = thumb.attr("href");
|
||||||
try {
|
try {
|
||||||
|
@ -81,7 +81,7 @@ public class ImagebamRipper extends AbstractHTMLRipper {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<String> getURLsFromPage(Document doc) {
|
public List<String> getURLsFromPage(Document doc) {
|
||||||
List<String> imageURLs = new ArrayList<String>();
|
List<String> imageURLs = new ArrayList<>();
|
||||||
for (Element thumb : doc.select("div > a[target=_blank]:not(.footera)")) {
|
for (Element thumb : doc.select("div > a[target=_blank]:not(.footera)")) {
|
||||||
imageURLs.add(thumb.attr("href"));
|
imageURLs.add(thumb.attr("href"));
|
||||||
}
|
}
|
||||||
@ -124,7 +124,7 @@ public class ImagebamRipper extends AbstractHTMLRipper {
|
|||||||
private URL url;
|
private URL url;
|
||||||
private int index;
|
private int index;
|
||||||
|
|
||||||
public ImagebamImageThread(URL url, int index) {
|
ImagebamImageThread(URL url, int index) {
|
||||||
super();
|
super();
|
||||||
this.url = url;
|
this.url = url;
|
||||||
this.index = index;
|
this.index = index;
|
||||||
|
@ -120,7 +120,7 @@ public class ImagefapRipper extends AbstractHTMLRipper {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<String> getURLsFromPage(Document doc) {
|
public List<String> getURLsFromPage(Document doc) {
|
||||||
List<String> imageURLs = new ArrayList<String>();
|
List<String> imageURLs = new ArrayList<>();
|
||||||
for (Element thumb : doc.select("#gallery img")) {
|
for (Element thumb : doc.select("#gallery img")) {
|
||||||
if (!thumb.hasAttr("src") || !thumb.hasAttr("width")) {
|
if (!thumb.hasAttr("src") || !thumb.hasAttr("width")) {
|
||||||
continue;
|
continue;
|
||||||
@ -129,7 +129,7 @@ public class ImagefapRipper extends AbstractHTMLRipper {
|
|||||||
image = image.replaceAll(
|
image = image.replaceAll(
|
||||||
"http://x.*.fap.to/images/thumb/",
|
"http://x.*.fap.to/images/thumb/",
|
||||||
"http://fap.to/images/full/");
|
"http://fap.to/images/full/");
|
||||||
image = image.replaceAll("w[0-9]{1,}-h[0-9]{1,}/", "");
|
image = image.replaceAll("w[0-9]+-h[0-9]+/", "");
|
||||||
imageURLs.add(image);
|
imageURLs.add(image);
|
||||||
if (isThisATest()) {
|
if (isThisATest()) {
|
||||||
break;
|
break;
|
||||||
@ -152,7 +152,7 @@ public class ImagefapRipper extends AbstractHTMLRipper {
|
|||||||
Pattern p = Pattern.compile("^Porn pics of (.*) \\(Page 1\\)$");
|
Pattern p = Pattern.compile("^Porn pics of (.*) \\(Page 1\\)$");
|
||||||
Matcher m = p.matcher(title);
|
Matcher m = p.matcher(title);
|
||||||
if (m.matches()) {
|
if (m.matches()) {
|
||||||
return getHost() + "_" + m.group(1);
|
return getHost() + "_" + m.group(1) + "_" + getGID(url);
|
||||||
}
|
}
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
// Fall back to default album naming convention
|
// Fall back to default album naming convention
|
||||||
@ -160,4 +160,4 @@ public class ImagefapRipper extends AbstractHTMLRipper {
|
|||||||
return super.getAlbumTitle(url);
|
return super.getAlbumTitle(url);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -1,87 +0,0 @@
|
|||||||
package com.rarchives.ripme.ripper.rippers;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.net.MalformedURLException;
|
|
||||||
import java.net.URL;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.regex.Matcher;
|
|
||||||
import java.util.regex.Pattern;
|
|
||||||
|
|
||||||
import org.json.JSONArray;
|
|
||||||
import org.json.JSONObject;
|
|
||||||
|
|
||||||
import com.rarchives.ripme.ripper.AbstractJSONRipper;
|
|
||||||
import com.rarchives.ripme.utils.Http;
|
|
||||||
|
|
||||||
public class ImagestashRipper extends AbstractJSONRipper {
|
|
||||||
|
|
||||||
private int page = 1;
|
|
||||||
|
|
||||||
public ImagestashRipper(URL url) throws IOException {
|
|
||||||
super(url);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getHost() {
|
|
||||||
return "imagestash";
|
|
||||||
}
|
|
||||||
@Override
|
|
||||||
public String getDomain() {
|
|
||||||
return "imagestash.org";
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getGID(URL url) throws MalformedURLException {
|
|
||||||
Pattern p = Pattern.compile("^.*imagestash.org/tag/([a-zA-Z0-9\\-_]+)$");
|
|
||||||
Matcher m = p.matcher(url.toExternalForm());
|
|
||||||
if (m.matches()) {
|
|
||||||
return m.group(1);
|
|
||||||
}
|
|
||||||
throw new MalformedURLException(
|
|
||||||
"Expected imagestash.org tag formats: "
|
|
||||||
+ "imagestash.org/tag/tagname"
|
|
||||||
+ " Got: " + url);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public JSONObject getFirstPage() throws IOException {
|
|
||||||
String baseURL = "https://imagestash.org/images?tags="
|
|
||||||
+ getGID(url)
|
|
||||||
+ "&page=" + page;
|
|
||||||
return Http.url(baseURL).getJSON();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public JSONObject getNextPage(JSONObject json) throws IOException {
|
|
||||||
int count = json.getInt("count"),
|
|
||||||
offset = json.getInt("offset"),
|
|
||||||
total = json.getInt("total");
|
|
||||||
if (count + offset >= total || json.getJSONArray("images").length() == 0) {
|
|
||||||
throw new IOException("No more images");
|
|
||||||
}
|
|
||||||
sleep(1000);
|
|
||||||
page++;
|
|
||||||
return getFirstPage();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public List<String> getURLsFromJSON(JSONObject json) {
|
|
||||||
List<String> imageURLs = new ArrayList<String>();
|
|
||||||
JSONArray images = json.getJSONArray("images");
|
|
||||||
for (int i = 0; i < images.length(); i++) {
|
|
||||||
JSONObject image = images.getJSONObject(i);
|
|
||||||
String imageURL = image.getString("src");
|
|
||||||
if (imageURL.startsWith("/")) {
|
|
||||||
imageURL = "https://imagestash.org" + imageURL;
|
|
||||||
}
|
|
||||||
imageURLs.add(imageURL);
|
|
||||||
}
|
|
||||||
return imageURLs;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void downloadURL(URL url, int index) {
|
|
||||||
addURLToDownload(url, getPrefix(index));
|
|
||||||
}
|
|
||||||
}
|
|
@ -62,7 +62,7 @@ public class ImagevenueRipper extends AbstractHTMLRipper {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public List<String> getURLsFromPage(Document doc) {
|
public List<String> getURLsFromPage(Document doc) {
|
||||||
List<String> imageURLs = new ArrayList<String>();
|
List<String> imageURLs = new ArrayList<>();
|
||||||
for (Element thumb : doc.select("a[target=_blank]")) {
|
for (Element thumb : doc.select("a[target=_blank]")) {
|
||||||
imageURLs.add(thumb.attr("href"));
|
imageURLs.add(thumb.attr("href"));
|
||||||
}
|
}
|
||||||
@ -83,7 +83,7 @@ public class ImagevenueRipper extends AbstractHTMLRipper {
|
|||||||
private URL url;
|
private URL url;
|
||||||
private int index;
|
private int index;
|
||||||
|
|
||||||
public ImagevenueImageThread(URL url, int index) {
|
ImagevenueImageThread(URL url, int index) {
|
||||||
super();
|
super();
|
||||||
this.url = url;
|
this.url = url;
|
||||||
this.index = index;
|
this.index = index;
|
||||||
|
@ -46,11 +46,11 @@ public class ImgboxRipper extends AbstractHTMLRipper {
|
|||||||
}
|
}
|
||||||
@Override
|
@Override
|
||||||
public List<String> getURLsFromPage(Document doc) {
|
public List<String> getURLsFromPage(Document doc) {
|
||||||
List<String> imageURLs = new ArrayList<String>();
|
List<String> imageURLs = new ArrayList<>();
|
||||||
for (Element thumb : doc.select("div.boxed-content > a > img")) {
|
for (Element thumb : doc.select("div.boxed-content > a > img")) {
|
||||||
String image = thumb.attr("src")
|
String image = thumb.attr("src").replaceAll("thumbs", "images");
|
||||||
.replaceAll("[-a-zA-Z0-9.]+s.imgbox.com",
|
image = image.replace("_b", "_o");
|
||||||
"i.imgbox.com");
|
image = image.replaceAll("\\d-s", "i");
|
||||||
imageURLs.add(image);
|
imageURLs.add(image);
|
||||||
}
|
}
|
||||||
return imageURLs;
|
return imageURLs;
|
||||||
|
@ -31,14 +31,15 @@ public class ImgurRipper extends AlbumRipper {
|
|||||||
|
|
||||||
private Document albumDoc;
|
private Document albumDoc;
|
||||||
|
|
||||||
static enum ALBUM_TYPE {
|
enum ALBUM_TYPE {
|
||||||
ALBUM,
|
ALBUM,
|
||||||
USER,
|
USER,
|
||||||
USER_ALBUM,
|
USER_ALBUM,
|
||||||
USER_IMAGES,
|
USER_IMAGES,
|
||||||
SERIES_OF_IMAGES,
|
SERIES_OF_IMAGES,
|
||||||
SUBREDDIT
|
SUBREDDIT
|
||||||
};
|
}
|
||||||
|
|
||||||
private ALBUM_TYPE albumType;
|
private ALBUM_TYPE albumType;
|
||||||
|
|
||||||
public ImgurRipper(URL url) throws IOException {
|
public ImgurRipper(URL url) throws IOException {
|
||||||
@ -104,7 +105,8 @@ public class ImgurRipper extends AlbumRipper {
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
String title = null;
|
String title = null;
|
||||||
final String defaultTitle = "Imgur: The most awesome images on the Internet";
|
final String defaultTitle1 = "Imgur: The most awesome images on the Internet";
|
||||||
|
final String defaultTitle2 = "Imgur: The magic of the Internet";
|
||||||
logger.info("Trying to get album title");
|
logger.info("Trying to get album title");
|
||||||
elems = albumDoc.select("meta[property=og:title]");
|
elems = albumDoc.select("meta[property=og:title]");
|
||||||
if (elems != null) {
|
if (elems != null) {
|
||||||
@ -113,7 +115,7 @@ public class ImgurRipper extends AlbumRipper {
|
|||||||
}
|
}
|
||||||
// This is here encase the album is unnamed, to prevent
|
// This is here encase the album is unnamed, to prevent
|
||||||
// Imgur: The most awesome images on the Internet from being added onto the album name
|
// Imgur: The most awesome images on the Internet from being added onto the album name
|
||||||
if (title.contains(defaultTitle)) {
|
if (title.contains(defaultTitle1) || title.contains(defaultTitle2)) {
|
||||||
logger.debug("Album is untitled or imgur is returning the default title");
|
logger.debug("Album is untitled or imgur is returning the default title");
|
||||||
// We set the title to "" here because if it's found in the next few attempts it will be changed
|
// We set the title to "" here because if it's found in the next few attempts it will be changed
|
||||||
// but if it's nto found there will be no reason to set it later
|
// but if it's nto found there will be no reason to set it later
|
||||||
@ -121,7 +123,7 @@ public class ImgurRipper extends AlbumRipper {
|
|||||||
logger.debug("Trying to use title tag to get title");
|
logger.debug("Trying to use title tag to get title");
|
||||||
elems = albumDoc.select("title");
|
elems = albumDoc.select("title");
|
||||||
if (elems != null) {
|
if (elems != null) {
|
||||||
if (elems.text().contains(defaultTitle)) {
|
if (elems.text().contains(defaultTitle1) || elems.text().contains(defaultTitle2)) {
|
||||||
logger.debug("Was unable to get album title or album was untitled");
|
logger.debug("Was unable to get album title or album was untitled");
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
@ -223,7 +225,7 @@ public class ImgurRipper extends AlbumRipper {
|
|||||||
String[] imageIds = m.group(1).split(",");
|
String[] imageIds = m.group(1).split(",");
|
||||||
for (String imageId : imageIds) {
|
for (String imageId : imageIds) {
|
||||||
// TODO: Fetch image with ID imageId
|
// TODO: Fetch image with ID imageId
|
||||||
logger.debug("Fetching image info for ID " + imageId);;
|
logger.debug("Fetching image info for ID " + imageId);
|
||||||
try {
|
try {
|
||||||
JSONObject json = Http.url("https://api.imgur.com/2/image/" + imageId + ".json").getJSON();
|
JSONObject json = Http.url("https://api.imgur.com/2/image/" + imageId + ".json").getJSON();
|
||||||
if (!json.has("image")) {
|
if (!json.has("image")) {
|
||||||
@ -350,7 +352,6 @@ public class ImgurRipper extends AlbumRipper {
|
|||||||
Thread.sleep(SLEEP_BETWEEN_ALBUMS * 1000);
|
Thread.sleep(SLEEP_BETWEEN_ALBUMS * 1000);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
logger.error("Error while ripping album: " + e.getMessage(), e);
|
logger.error("Error while ripping album: " + e.getMessage(), e);
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -448,6 +449,15 @@ public class ImgurRipper extends AlbumRipper {
|
|||||||
this.url = new URL("http://imgur.com/a/" + gid);
|
this.url = new URL("http://imgur.com/a/" + gid);
|
||||||
return gid;
|
return gid;
|
||||||
}
|
}
|
||||||
|
p = Pattern.compile("^https?://(www\\.|m\\.)?imgur\\.com/(a|gallery|t)/[a-zA-Z0-9]*/([a-zA-Z0-9]{5,}).*$");
|
||||||
|
m = p.matcher(url.toExternalForm());
|
||||||
|
if (m.matches()) {
|
||||||
|
// Imgur album or gallery
|
||||||
|
albumType = ALBUM_TYPE.ALBUM;
|
||||||
|
String gid = m.group(m.groupCount());
|
||||||
|
this.url = new URL("http://imgur.com/a/" + gid);
|
||||||
|
return gid;
|
||||||
|
}
|
||||||
p = Pattern.compile("^https?://([a-zA-Z0-9\\-]{3,})\\.imgur\\.com/?$");
|
p = Pattern.compile("^https?://([a-zA-Z0-9\\-]{3,})\\.imgur\\.com/?$");
|
||||||
m = p.matcher(url.toExternalForm());
|
m = p.matcher(url.toExternalForm());
|
||||||
if (m.matches()) {
|
if (m.matches()) {
|
||||||
@ -515,12 +525,12 @@ public class ImgurRipper extends AlbumRipper {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public static class ImgurImage {
|
public static class ImgurImage {
|
||||||
public String title = "",
|
String title = "";
|
||||||
description = "",
|
String description = "";
|
||||||
extension = "";
|
String extension = "";
|
||||||
public URL url = null;
|
public URL url = null;
|
||||||
|
|
||||||
public ImgurImage(URL url) {
|
ImgurImage(URL url) {
|
||||||
this.url = url;
|
this.url = url;
|
||||||
String tempUrl = url.toExternalForm();
|
String tempUrl = url.toExternalForm();
|
||||||
this.extension = tempUrl.substring(tempUrl.lastIndexOf('.'));
|
this.extension = tempUrl.substring(tempUrl.lastIndexOf('.'));
|
||||||
@ -528,7 +538,7 @@ public class ImgurRipper extends AlbumRipper {
|
|||||||
this.extension = this.extension.substring(0, this.extension.indexOf("?"));
|
this.extension = this.extension.substring(0, this.extension.indexOf("?"));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
public ImgurImage(URL url, String title) {
|
ImgurImage(URL url, String title) {
|
||||||
this(url);
|
this(url);
|
||||||
this.title = title;
|
this.title = title;
|
||||||
}
|
}
|
||||||
@ -536,7 +546,7 @@ public class ImgurRipper extends AlbumRipper {
|
|||||||
this(url, title);
|
this(url, title);
|
||||||
this.description = description;
|
this.description = description;
|
||||||
}
|
}
|
||||||
public String getSaveAs() {
|
String getSaveAs() {
|
||||||
String saveAs = this.title;
|
String saveAs = this.title;
|
||||||
String u = url.toExternalForm();
|
String u = url.toExternalForm();
|
||||||
if (u.contains("?")) {
|
if (u.contains("?")) {
|
||||||
@ -554,17 +564,17 @@ public class ImgurRipper extends AlbumRipper {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public static class ImgurAlbum {
|
public static class ImgurAlbum {
|
||||||
public String title = null;
|
String title = null;
|
||||||
public URL url = null;
|
public URL url = null;
|
||||||
public List<ImgurImage> images = new ArrayList<ImgurImage>();
|
public List<ImgurImage> images = new ArrayList<>();
|
||||||
public ImgurAlbum(URL url) {
|
ImgurAlbum(URL url) {
|
||||||
this.url = url;
|
this.url = url;
|
||||||
}
|
}
|
||||||
public ImgurAlbum(URL url, String title) {
|
public ImgurAlbum(URL url, String title) {
|
||||||
this(url);
|
this(url);
|
||||||
this.title = title;
|
this.title = title;
|
||||||
}
|
}
|
||||||
public void addImage(ImgurImage image) {
|
void addImage(ImgurImage image) {
|
||||||
images.add(image);
|
images.add(image);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -3,6 +3,8 @@ package com.rarchives.ripme.ripper.rippers;
|
|||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.MalformedURLException;
|
import java.net.MalformedURLException;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
|
import java.time.*;
|
||||||
|
import java.time.format.DateTimeFormatter;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
@ -12,10 +14,16 @@ import org.json.JSONArray;
|
|||||||
import org.json.JSONException;
|
import org.json.JSONException;
|
||||||
import org.json.JSONObject;
|
import org.json.JSONObject;
|
||||||
|
|
||||||
import com.rarchives.ripme.ripper.AbstractJSONRipper;
|
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||||
import com.rarchives.ripme.utils.Http;
|
import com.rarchives.ripme.utils.Http;
|
||||||
|
|
||||||
public class InstagramRipper extends AbstractJSONRipper {
|
import org.jsoup.nodes.Document;
|
||||||
|
import org.jsoup.nodes.Element;
|
||||||
|
import com.rarchives.ripme.ui.RipStatusMessage;
|
||||||
|
import com.rarchives.ripme.utils.Utils;
|
||||||
|
|
||||||
|
|
||||||
|
public class InstagramRipper extends AbstractHTMLRipper {
|
||||||
|
|
||||||
private String userID;
|
private String userID;
|
||||||
|
|
||||||
@ -37,131 +45,248 @@ public class InstagramRipper extends AbstractJSONRipper {
|
|||||||
return (url.getHost().endsWith("instagram.com"));
|
return (url.getHost().endsWith("instagram.com"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public URL sanitizeURL(URL url) throws MalformedURLException {
|
||||||
|
URL san_url = new URL(url.toExternalForm().replaceAll("\\?hl=\\S*", ""));
|
||||||
|
logger.info("sanitized URL is " + san_url.toExternalForm());
|
||||||
|
return san_url;
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<String> getPostsFromSinglePage(Document Doc) {
|
||||||
|
List<String> imageURLs = new ArrayList<>();
|
||||||
|
JSONArray datas;
|
||||||
|
try {
|
||||||
|
JSONObject json = getJSONFromPage(Doc);
|
||||||
|
if (json.getJSONObject("entry_data").getJSONArray("PostPage")
|
||||||
|
.getJSONObject(0).getJSONObject("graphql").getJSONObject("shortcode_media")
|
||||||
|
.has("edge_sidecar_to_children")) {
|
||||||
|
datas = json.getJSONObject("entry_data").getJSONArray("PostPage")
|
||||||
|
.getJSONObject(0).getJSONObject("graphql").getJSONObject("shortcode_media")
|
||||||
|
.getJSONObject("edge_sidecar_to_children").getJSONArray("edges");
|
||||||
|
for (int i = 0; i < datas.length(); i++) {
|
||||||
|
JSONObject data = (JSONObject) datas.get(i);
|
||||||
|
data = data.getJSONObject("node");
|
||||||
|
if (data.has("is_video") && data.getBoolean("is_video")) {
|
||||||
|
imageURLs.add(data.getString("video_url"));
|
||||||
|
} else {
|
||||||
|
imageURLs.add(data.getString("display_url"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
JSONObject data = json.getJSONObject("entry_data").getJSONArray("PostPage")
|
||||||
|
.getJSONObject(0).getJSONObject("graphql").getJSONObject("shortcode_media");
|
||||||
|
if (data.getBoolean("is_video")) {
|
||||||
|
imageURLs.add(data.getString("video_url"));
|
||||||
|
} else {
|
||||||
|
imageURLs.add(data.getString("display_url"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return imageURLs;
|
||||||
|
} catch (IOException e) {
|
||||||
|
logger.error("Unable to get JSON from page " + url.toExternalForm());
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getGID(URL url) throws MalformedURLException {
|
public String getGID(URL url) throws MalformedURLException {
|
||||||
Pattern p = Pattern.compile("^https?://instagram.com/([^/]+)");
|
Pattern p = Pattern.compile("^https?://instagram.com/([^/]+)/?");
|
||||||
Matcher m = p.matcher(url.toExternalForm());
|
Matcher m = p.matcher(url.toExternalForm());
|
||||||
if (m.matches()) {
|
if (m.matches()) {
|
||||||
return m.group(1);
|
return m.group(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
p = Pattern.compile("^https?://www.instagram.com/([^/]+)/?(?:\\?hl=\\S*)?/?");
|
||||||
|
m = p.matcher(url.toExternalForm());
|
||||||
|
if (m.matches()) {
|
||||||
|
return m.group(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
p = Pattern.compile("^https?://www.instagram.com/p/([a-zA-Z0-9_-]+)/\\?taken-by=([^/]+)/?");
|
||||||
|
m = p.matcher(url.toExternalForm());
|
||||||
|
if (m.matches()) {
|
||||||
|
return m.group(2) + "_" + m.group(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
p = Pattern.compile("^https?://www.instagram.com/p/([a-zA-Z0-9_-]+)/?");
|
||||||
|
m = p.matcher(url.toExternalForm());
|
||||||
|
if (m.matches()) {
|
||||||
|
return m.group(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
p = Pattern.compile("^https?://www.instagram.com/p/([a-zA-Z0-9_-]+)/?(?:\\?hl=\\S*)?/?");
|
||||||
|
m = p.matcher(url.toExternalForm());
|
||||||
|
if (m.matches()) {
|
||||||
|
return m.group(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
p = Pattern.compile("^https?://www.instagram.com/explore/tags/([^/]+)/?");
|
||||||
|
m = p.matcher(url.toExternalForm());
|
||||||
|
if (m.matches()) {
|
||||||
|
return m.group(1);
|
||||||
|
}
|
||||||
|
|
||||||
throw new MalformedURLException("Unable to find user in " + url);
|
throw new MalformedURLException("Unable to find user in " + url);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
private JSONObject getJSONFromPage(Document firstPage) throws IOException {
|
||||||
public URL sanitizeURL(URL url) throws MalformedURLException {
|
String jsonText = "";
|
||||||
Pattern p = Pattern.compile("^.*instagram\\.com/([a-zA-Z0-9\\-_.]+).*$");
|
|
||||||
Matcher m = p.matcher(url.toExternalForm());
|
|
||||||
if (m.matches()) {
|
|
||||||
return new URL("http://instagram.com/" + m.group(1));
|
|
||||||
}
|
|
||||||
|
|
||||||
throw new MalformedURLException("Expected username in URL (instagram.com/username and not " + url);
|
|
||||||
}
|
|
||||||
|
|
||||||
private String getUserID(URL url) throws IOException {
|
|
||||||
|
|
||||||
Pattern p = Pattern.compile("^https?://instagram\\.com/([^/]+)");
|
|
||||||
Matcher m = p.matcher(url.toExternalForm());
|
|
||||||
if (m.matches()) {
|
|
||||||
return m.group(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
throw new IOException("Unable to find userID at " + this.url);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public JSONObject getFirstPage() throws IOException {
|
|
||||||
userID = getUserID(url);
|
|
||||||
|
|
||||||
String baseURL = "http://instagram.com/" + userID + "/media";
|
|
||||||
try {
|
try {
|
||||||
JSONObject result = Http.url(baseURL).getJSON();
|
for (Element script : firstPage.select("script[type=text/javascript]")) {
|
||||||
return result;
|
if (script.data().contains("window._sharedData = ")) {
|
||||||
|
jsonText = script.data().replaceAll("window._sharedData = ", "");
|
||||||
|
jsonText = jsonText.replaceAll("};", "}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return new JSONObject(jsonText);
|
||||||
} catch (JSONException e) {
|
} catch (JSONException e) {
|
||||||
throw new IOException("Could not get instagram user via: " + baseURL);
|
throw new IOException("Could not get JSON from page");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public JSONObject getNextPage(JSONObject json) throws IOException {
|
public Document getFirstPage() throws IOException {
|
||||||
|
userID = getGID(url);
|
||||||
|
return Http.url(url).get();
|
||||||
|
}
|
||||||
|
|
||||||
boolean nextPageAvailable;
|
private String getVideoFromPage(String videoID) {
|
||||||
try {
|
try {
|
||||||
nextPageAvailable = json.getBoolean("more_available");
|
Document doc = Http.url("https://www.instagram.com/p/" + videoID).get();
|
||||||
} catch (Exception e) {
|
return doc.select("meta[property=og:video]").attr("content");
|
||||||
throw new IOException("No additional pages found");
|
} catch (IOException e) {
|
||||||
}
|
logger.warn("Unable to get page " + "https://www.instagram.com/p/" + videoID);
|
||||||
|
|
||||||
if (nextPageAvailable) {
|
|
||||||
JSONArray items = json.getJSONArray("items");
|
|
||||||
JSONObject last_item = items.getJSONObject(items.length() - 1);
|
|
||||||
String nextMaxID = last_item.getString("id");
|
|
||||||
|
|
||||||
String baseURL = "http://instagram.com/" + userID + "/media/?max_id=" + nextMaxID;
|
|
||||||
logger.info("Loading " + baseURL);
|
|
||||||
sleep(1000);
|
|
||||||
|
|
||||||
JSONObject nextJSON = Http.url(baseURL).getJSON();
|
|
||||||
|
|
||||||
return nextJSON;
|
|
||||||
} else {
|
|
||||||
throw new IOException("No more images found");
|
|
||||||
}
|
}
|
||||||
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
private String getOriginalUrl(String imageURL) {
|
private String getOriginalUrl(String imageURL) {
|
||||||
|
// Without this regex most images will return a 403 error
|
||||||
|
imageURL = imageURL.replaceAll("vp/[a-zA-Z0-9]*/", "");
|
||||||
imageURL = imageURL.replaceAll("scontent.cdninstagram.com/hphotos-", "igcdn-photos-d-a.akamaihd.net/hphotos-ak-");
|
imageURL = imageURL.replaceAll("scontent.cdninstagram.com/hphotos-", "igcdn-photos-d-a.akamaihd.net/hphotos-ak-");
|
||||||
|
// TODO replace this with a single regex
|
||||||
|
imageURL = imageURL.replaceAll("p150x150/", "");
|
||||||
|
imageURL = imageURL.replaceAll("p320x320/", "");
|
||||||
|
imageURL = imageURL.replaceAll("p480x480/", "");
|
||||||
|
imageURL = imageURL.replaceAll("p640x640/", "");
|
||||||
|
imageURL = imageURL.replaceAll("p720x720/", "");
|
||||||
|
imageURL = imageURL.replaceAll("p1080x1080/", "");
|
||||||
|
imageURL = imageURL.replaceAll("p2048x2048/", "");
|
||||||
|
imageURL = imageURL.replaceAll("s150x150/", "");
|
||||||
|
imageURL = imageURL.replaceAll("s320x320/", "");
|
||||||
|
imageURL = imageURL.replaceAll("s480x480/", "");
|
||||||
imageURL = imageURL.replaceAll("s640x640/", "");
|
imageURL = imageURL.replaceAll("s640x640/", "");
|
||||||
|
imageURL = imageURL.replaceAll("s720x720/", "");
|
||||||
|
imageURL = imageURL.replaceAll("s1080x1080/", "");
|
||||||
|
imageURL = imageURL.replaceAll("s2048x2048/", "");
|
||||||
|
|
||||||
|
|
||||||
// Instagram returns cropped images to unauthenticated applications to maintain legacy support.
|
// Instagram returns cropped images to unauthenticated applications to maintain legacy support.
|
||||||
// To retrieve the uncropped image, remove this segment from the URL.
|
// To retrieve the uncropped image, remove this segment from the URL.
|
||||||
// Segment format: cX.Y.W.H - eg: c0.134.1080.1080
|
// Segment format: cX.Y.W.H - eg: c0.134.1080.1080
|
||||||
imageURL = imageURL.replaceAll("\\/c\\d{1,4}\\.\\d{1,4}\\.\\d{1,4}\\.\\d{1,4}", "");
|
imageURL = imageURL.replaceAll("/c\\d{1,4}\\.\\d{1,4}\\.\\d{1,4}\\.\\d{1,4}", "");
|
||||||
|
|
||||||
imageURL = imageURL.replaceAll("\\?ig_cache_key.+$", "");
|
imageURL = imageURL.replaceAll("\\?ig_cache_key.+$", "");
|
||||||
return imageURL;
|
return imageURL;
|
||||||
}
|
}
|
||||||
|
|
||||||
private String getMedia(JSONObject data) {
|
|
||||||
String imageURL = "";
|
|
||||||
if (data.has("videos")) {
|
|
||||||
imageURL = data.getJSONObject("videos").getJSONObject("standard_resolution").getString("url");
|
|
||||||
} else if (data.has("images")) {
|
|
||||||
imageURL = data.getJSONObject("images").getJSONObject("standard_resolution").getString("url");
|
|
||||||
}
|
|
||||||
return imageURL;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<String> getURLsFromJSON(JSONObject json) {
|
public List<String> getURLsFromPage(Document doc) {
|
||||||
List<String> imageURLs = new ArrayList<String>();
|
String nextPageID = "";
|
||||||
JSONArray datas = json.getJSONArray("items");
|
List<String> imageURLs = new ArrayList<>();
|
||||||
for (int i = 0; i < datas.length(); i++) {
|
JSONObject json = new JSONObject();
|
||||||
JSONObject data = (JSONObject) datas.get(i);
|
try {
|
||||||
|
json = getJSONFromPage(doc);
|
||||||
|
} catch (IOException e) {
|
||||||
|
logger.warn("Unable to exact json from page");
|
||||||
|
}
|
||||||
|
|
||||||
String dataType = data.getString("type");
|
|
||||||
if (dataType.equals("carousel")) {
|
if (!url.toExternalForm().contains("/p/")) {
|
||||||
JSONArray carouselMedias = data.getJSONArray("carousel_media");
|
JSONArray datas = new JSONArray();
|
||||||
for (int carouselIndex = 0; carouselIndex < carouselMedias.length(); carouselIndex++) {
|
try {
|
||||||
JSONObject carouselMedia = (JSONObject) carouselMedias.get(carouselIndex);
|
JSONArray profilePage = json.getJSONObject("entry_data").getJSONArray("ProfilePage");
|
||||||
String imageURL = getMedia(carouselMedia);
|
datas = profilePage.getJSONObject(0).getJSONObject("user").getJSONObject("media").getJSONArray("nodes");
|
||||||
if (!imageURL.equals("")) {
|
} catch (JSONException e) {
|
||||||
imageURL = getOriginalUrl(imageURL);
|
// Handle hashtag pages
|
||||||
imageURLs.add(imageURL);
|
datas = json.getJSONObject("entry_data").getJSONArray("TagPage").getJSONObject(0)
|
||||||
|
.getJSONObject("tag").getJSONObject("media").getJSONArray("nodes");
|
||||||
|
}
|
||||||
|
for (int i = 0; i < datas.length(); i++) {
|
||||||
|
JSONObject data = (JSONObject) datas.get(i);
|
||||||
|
Long epoch = data.getLong("date");
|
||||||
|
Instant instant = Instant.ofEpochSecond(epoch);
|
||||||
|
String image_date = DateTimeFormatter.ofPattern("yyyy_MM_dd_hh:mm_").format(ZonedDateTime.ofInstant(instant, ZoneOffset.UTC));
|
||||||
|
if (data.getString("__typename").equals("GraphSidecar")) {
|
||||||
|
try {
|
||||||
|
Document slideShowDoc = Http.url(new URL ("https://www.instagram.com/p/" + data.getString("code"))).get();
|
||||||
|
List<String> toAdd = getPostsFromSinglePage(slideShowDoc);
|
||||||
|
for (int slideShowInt=0; slideShowInt<toAdd.size(); slideShowInt++) {
|
||||||
|
addURLToDownload(new URL(toAdd.get(slideShowInt)), image_date + data.getString("code"));
|
||||||
|
}
|
||||||
|
} catch (MalformedURLException e) {
|
||||||
|
logger.error("Unable to download slide show, URL was malformed");
|
||||||
|
} catch (IOException e) {
|
||||||
|
logger.error("Unable to download slide show");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
try {
|
||||||
String imageURL = getMedia(data);
|
if (!data.getBoolean("is_video")) {
|
||||||
if (!imageURL.equals("")) {
|
if (imageURLs.size() == 0) {
|
||||||
imageURL = getOriginalUrl(imageURL);
|
// We add this one item to the array because either wise
|
||||||
imageURLs.add(imageURL);
|
// the ripper will error out because we returned an empty array
|
||||||
|
imageURLs.add(getOriginalUrl(data.getString("thumbnail_src")));
|
||||||
|
}
|
||||||
|
addURLToDownload(new URL(getOriginalUrl(data.getString("thumbnail_src"))), image_date);
|
||||||
|
} else {
|
||||||
|
if (!Utils.getConfigBoolean("instagram.download_images_only", false)) {
|
||||||
|
addURLToDownload(new URL(getVideoFromPage(data.getString("code"))), image_date);
|
||||||
|
} else {
|
||||||
|
sendUpdate(RipStatusMessage.STATUS.DOWNLOAD_WARN, "Skipping video " + data.getString("code"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (MalformedURLException e) {
|
||||||
|
return imageURLs;
|
||||||
|
}
|
||||||
|
|
||||||
|
nextPageID = data.getString("id");
|
||||||
|
|
||||||
|
if (isThisATest()) {
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// Rip the next page
|
||||||
|
if (!nextPageID.equals("") && !isThisATest()) {
|
||||||
|
if (url.toExternalForm().contains("/tags/")) {
|
||||||
|
try {
|
||||||
|
// Sleep for a while to avoid a ban
|
||||||
|
sleep(2500);
|
||||||
|
if (url.toExternalForm().substring(url.toExternalForm().length() - 1).equals("/")) {
|
||||||
|
getURLsFromPage(Http.url(url.toExternalForm() + "?max_id=" + nextPageID).get());
|
||||||
|
} else {
|
||||||
|
getURLsFromPage(Http.url(url.toExternalForm() + "/?max_id=" + nextPageID).get());
|
||||||
|
}
|
||||||
|
|
||||||
if (isThisATest()) {
|
} catch (IOException e) {
|
||||||
break;
|
return imageURLs;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
// Sleep for a while to avoid a ban
|
||||||
|
sleep(2500);
|
||||||
|
getURLsFromPage(Http.url("https://www.instagram.com/" + userID + "/?max_id=" + nextPageID).get());
|
||||||
|
} catch (IOException e) {
|
||||||
|
return imageURLs;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
logger.warn("Can't get net page");
|
||||||
}
|
}
|
||||||
|
} else { // We're ripping from a single page
|
||||||
|
logger.info("Ripping from single page");
|
||||||
|
imageURLs = getPostsFromSinglePage(doc);
|
||||||
}
|
}
|
||||||
|
|
||||||
return imageURLs;
|
return imageURLs;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4,14 +4,12 @@ import java.io.IOException;
|
|||||||
import java.net.MalformedURLException;
|
import java.net.MalformedURLException;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
import org.jsoup.nodes.Document;
|
import org.jsoup.nodes.Document;
|
||||||
import org.jsoup.nodes.Element;
|
import org.jsoup.nodes.Element;
|
||||||
import org.jsoup.select.Elements;
|
|
||||||
|
|
||||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||||
import com.rarchives.ripme.utils.Http;
|
import com.rarchives.ripme.utils.Http;
|
||||||
@ -50,7 +48,7 @@ public class JagodibujaRipper extends AbstractHTMLRipper {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<String> getURLsFromPage(Document doc) {
|
public List<String> getURLsFromPage(Document doc) {
|
||||||
List<String> result = new ArrayList<String>();
|
List<String> result = new ArrayList<>();
|
||||||
for (Element comicPageUrl : doc.select("div.gallery-icon > a")) {
|
for (Element comicPageUrl : doc.select("div.gallery-icon > a")) {
|
||||||
try {
|
try {
|
||||||
sleep(500);
|
sleep(500);
|
||||||
|
@ -1,24 +1,15 @@
|
|||||||
package com.rarchives.ripme.ripper.rippers;
|
package com.rarchives.ripme.ripper.rippers;
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.FileOutputStream;
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.MalformedURLException;
|
import java.net.MalformedURLException;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
import com.rarchives.ripme.utils.Utils;
|
|
||||||
import org.jsoup.Connection.Method;
|
|
||||||
import org.jsoup.Connection.Response;
|
|
||||||
import org.jsoup.Jsoup;
|
|
||||||
import org.jsoup.nodes.Document;
|
import org.jsoup.nodes.Document;
|
||||||
import org.jsoup.nodes.Element;
|
import org.jsoup.nodes.Element;
|
||||||
import org.jsoup.safety.Whitelist;
|
|
||||||
import org.jsoup.select.Elements;
|
import org.jsoup.select.Elements;
|
||||||
|
|
||||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||||
@ -51,7 +42,7 @@ public class LusciousRipper extends AbstractHTMLRipper {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<String> getURLsFromPage(Document page) {
|
public List<String> getURLsFromPage(Document page) {
|
||||||
List<String> urls = new ArrayList<String>();
|
List<String> urls = new ArrayList<>();
|
||||||
Elements urlElements = page.select("img#single_picture");
|
Elements urlElements = page.select("img#single_picture");
|
||||||
for (Element e : urlElements) {
|
for (Element e : urlElements) {
|
||||||
urls.add(e.attr("src"));
|
urls.add(e.attr("src"));
|
||||||
|
@ -1,166 +0,0 @@
|
|||||||
package com.rarchives.ripme.ripper.rippers;
|
|
||||||
|
|
||||||
import java.awt.Desktop;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.net.MalformedURLException;
|
|
||||||
import java.net.URISyntaxException;
|
|
||||||
import java.net.URL;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.regex.Matcher;
|
|
||||||
import java.util.regex.Pattern;
|
|
||||||
|
|
||||||
import javax.net.ssl.SSLException;
|
|
||||||
import javax.swing.JOptionPane;
|
|
||||||
|
|
||||||
import org.json.JSONArray;
|
|
||||||
import org.json.JSONObject;
|
|
||||||
|
|
||||||
import com.rarchives.ripme.ripper.AbstractJSONRipper;
|
|
||||||
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
|
||||||
import com.rarchives.ripme.utils.Http;
|
|
||||||
|
|
||||||
public class MediacrushRipper extends AbstractJSONRipper {
|
|
||||||
|
|
||||||
/** Ordered list of preferred formats, sorted by preference (low-to-high) */
|
|
||||||
private static final Map<String, Integer> PREFERRED_FORMATS = new HashMap<String,Integer>();
|
|
||||||
static {
|
|
||||||
PREFERRED_FORMATS.put("mp4", 0);
|
|
||||||
PREFERRED_FORMATS.put("wemb",1);
|
|
||||||
PREFERRED_FORMATS.put("ogv", 2);
|
|
||||||
PREFERRED_FORMATS.put("mp3", 3);
|
|
||||||
PREFERRED_FORMATS.put("ogg", 4);
|
|
||||||
PREFERRED_FORMATS.put("gif", 5);
|
|
||||||
PREFERRED_FORMATS.put("png", 6);
|
|
||||||
PREFERRED_FORMATS.put("jpg", 7);
|
|
||||||
PREFERRED_FORMATS.put("jpeg",8);
|
|
||||||
};
|
|
||||||
|
|
||||||
public MediacrushRipper(URL url) throws IOException {
|
|
||||||
super(url);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getHost() {
|
|
||||||
return "mediacrush";
|
|
||||||
}
|
|
||||||
@Override
|
|
||||||
public String getDomain() {
|
|
||||||
return "mediacru.sh";
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getGID(URL url) throws MalformedURLException {
|
|
||||||
Pattern p = Pattern.compile("https?://[wm.]*mediacru\\.sh/([a-zA-Z0-9]+).*");
|
|
||||||
Matcher m = p.matcher(url.toExternalForm());
|
|
||||||
if (m.matches()) {
|
|
||||||
return m.group(1);
|
|
||||||
}
|
|
||||||
throw new MalformedURLException("Could not find mediacru.sh page ID from " + url
|
|
||||||
+ " expected format: http://mediacru.sh/pageid");
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public URL sanitizeURL(URL url) throws MalformedURLException {
|
|
||||||
String u = url.toExternalForm();
|
|
||||||
// Strip trailing "/" characters
|
|
||||||
while (u.endsWith("/")) {
|
|
||||||
u = u.substring(0, u.length() - 1);
|
|
||||||
}
|
|
||||||
// Append .json
|
|
||||||
if (!u.endsWith(".json")) {
|
|
||||||
u += ".json";
|
|
||||||
}
|
|
||||||
return new URL(u);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public JSONObject getFirstPage() throws IOException {
|
|
||||||
try {
|
|
||||||
String jsonString = Http.url(url)
|
|
||||||
.ignoreContentType()
|
|
||||||
.connection()
|
|
||||||
.execute().body();
|
|
||||||
jsonString = jsonString.replace(""", "\"");
|
|
||||||
return new JSONObject(jsonString);
|
|
||||||
} catch (SSLException re) {
|
|
||||||
// Check for >1024 bit encryption but in older versions of Java
|
|
||||||
// It's the bug. Suggest downloading the latest version.
|
|
||||||
int selection = JOptionPane.showOptionDialog(null,
|
|
||||||
"You need to upgrade to the latest Java (7+) to rip this album.\n"
|
|
||||||
+ "Do you want to open java.com and download the latest version?",
|
|
||||||
"RipMe - Java Error",
|
|
||||||
JOptionPane.OK_CANCEL_OPTION,
|
|
||||||
JOptionPane.ERROR_MESSAGE,
|
|
||||||
null,
|
|
||||||
new String[] {"Go to java.com", "Cancel"},
|
|
||||||
0);
|
|
||||||
sendUpdate(STATUS.RIP_ERRORED, "Your version of Java can't handle some secure websites");
|
|
||||||
if (selection == 0) {
|
|
||||||
URL javaUrl = new URL("https://www.java.com/en/download/");
|
|
||||||
try {
|
|
||||||
Desktop.getDesktop().browse(javaUrl.toURI());
|
|
||||||
} catch (URISyntaxException use) { }
|
|
||||||
}
|
|
||||||
throw new IOException("Cannot rip due to limitations in Java installation, consider upgrading Java", re.getCause());
|
|
||||||
}
|
|
||||||
catch (Exception e) {
|
|
||||||
throw new IOException("Unexpected error: " + e.getMessage(), e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public List<String> getURLsFromJSON(JSONObject json) {
|
|
||||||
List<String> imageURLs = new ArrayList<String>();
|
|
||||||
// Iterate over all files
|
|
||||||
JSONArray files = json.getJSONArray("files");
|
|
||||||
for (int i = 0; i < files.length(); i++) {
|
|
||||||
JSONObject file = (JSONObject) files.get(i);
|
|
||||||
// Find preferred file format
|
|
||||||
JSONArray subfiles = file.getJSONArray("files");
|
|
||||||
String preferredUrl = getPreferredUrl(subfiles);
|
|
||||||
if (preferredUrl == null) {
|
|
||||||
logger.warn("Could not find 'file' inside of " + file);
|
|
||||||
sendUpdate(STATUS.DOWNLOAD_ERRORED, "Could not find file inside of " + file);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
imageURLs.add(preferredUrl);
|
|
||||||
}
|
|
||||||
return imageURLs;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void downloadURL(URL url, int index) {
|
|
||||||
addURLToDownload(url, getPrefix(index));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Iterates over list if "file" objects and returns the preferred
|
|
||||||
* image format.
|
|
||||||
* @param subfiles Array of "files" (JSONObjects) which contain
|
|
||||||
* @return Preferred media format.
|
|
||||||
*/
|
|
||||||
private String getPreferredUrl(JSONArray subfiles) {
|
|
||||||
String preferredUrl = null;
|
|
||||||
int preferredIndex = Integer.MAX_VALUE;
|
|
||||||
// Iterate over all media types
|
|
||||||
for (int j = 0; j < subfiles.length(); j++) {
|
|
||||||
JSONObject subfile = subfiles.getJSONObject(j);
|
|
||||||
String thisurl = subfile.getString("url");
|
|
||||||
String extension = thisurl.substring(thisurl.lastIndexOf(".") + 1);
|
|
||||||
if (!PREFERRED_FORMATS.containsKey(extension)) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
// Keep track of the most-preferred format
|
|
||||||
int thisindex = PREFERRED_FORMATS.get(extension);
|
|
||||||
if (preferredUrl == null || thisindex < preferredIndex) {
|
|
||||||
preferredIndex = thisindex;
|
|
||||||
preferredUrl = thisurl;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return preferredUrl;
|
|
||||||
}
|
|
||||||
}
|
|
@ -3,134 +3,64 @@ package com.rarchives.ripme.ripper.rippers;
|
|||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.MalformedURLException;
|
import java.net.MalformedURLException;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
import java.util.HashMap;
|
import java.util.ArrayList;
|
||||||
import java.util.Map;
|
import java.util.List;
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
import org.json.JSONArray;
|
|
||||||
import org.json.JSONObject;
|
|
||||||
import org.jsoup.Connection.Method;
|
|
||||||
import org.jsoup.Connection.Response;
|
|
||||||
import org.jsoup.Jsoup;
|
|
||||||
import org.jsoup.nodes.Document;
|
import org.jsoup.nodes.Document;
|
||||||
|
import org.jsoup.nodes.Element;
|
||||||
|
|
||||||
import com.rarchives.ripme.ripper.AlbumRipper;
|
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||||
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
import com.rarchives.ripme.utils.Http;
|
||||||
import com.rarchives.ripme.utils.Utils;
|
|
||||||
|
|
||||||
public class ModelmayhemRipper extends AlbumRipper {
|
public class ModelmayhemRipper extends AbstractHTMLRipper {
|
||||||
|
|
||||||
private static final String DOMAIN = "modelmayhem.com",
|
|
||||||
HOST = "modelmayhem";
|
|
||||||
|
|
||||||
public ModelmayhemRipper(URL url) throws IOException {
|
public ModelmayhemRipper(URL url) throws IOException {
|
||||||
super(url);
|
super(url);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean canRip(URL url) {
|
|
||||||
return (url.getHost().endsWith(DOMAIN));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public URL sanitizeURL(URL url) throws MalformedURLException {
|
|
||||||
return url;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void rip() throws IOException {
|
|
||||||
Map<String,String> cookies = null,
|
|
||||||
postData = new HashMap<String,String>();
|
|
||||||
String gid = getGID(this.url),
|
|
||||||
ref = "http://www.modelmayhem.com/" + gid;
|
|
||||||
|
|
||||||
Response resp = null;
|
|
||||||
String theurl = "http://www.modelmayhem.com/" + gid;
|
|
||||||
logger.info("Loading " + theurl);
|
|
||||||
resp = Jsoup.connect(theurl)
|
|
||||||
.timeout(5000)
|
|
||||||
.referrer("")
|
|
||||||
.userAgent(USER_AGENT)
|
|
||||||
.method(Method.GET)
|
|
||||||
.execute();
|
|
||||||
cookies = resp.cookies();
|
|
||||||
|
|
||||||
resp = Jsoup.connect("http://www.modelmayhem.com/includes/js/auth.php")
|
|
||||||
.cookies(cookies)
|
|
||||||
.ignoreContentType(true)
|
|
||||||
.referrer(ref)
|
|
||||||
.userAgent(USER_AGENT)
|
|
||||||
.method(Method.GET)
|
|
||||||
.execute();
|
|
||||||
String authText = resp.parse().html();
|
|
||||||
String mmservice = authText.substring(authText.indexOf("token = '") + 9);
|
|
||||||
mmservice = mmservice.substring(0, mmservice.indexOf("'"));
|
|
||||||
|
|
||||||
cookies.putAll(resp.cookies());
|
|
||||||
|
|
||||||
cookies.put("worksafe", "0");
|
|
||||||
theurl = "http://www.modelmayhem.com/services/photo_viewer/albums/" + gid;
|
|
||||||
postData.put("MMSERVICE", mmservice);
|
|
||||||
resp = Jsoup.connect(theurl)
|
|
||||||
.data(postData)
|
|
||||||
.cookies(cookies)
|
|
||||||
.referrer(ref)
|
|
||||||
.userAgent(USER_AGENT)
|
|
||||||
.method(Method.POST)
|
|
||||||
.execute();
|
|
||||||
cookies.putAll(resp.cookies());
|
|
||||||
|
|
||||||
theurl = "http://www.modelmayhem.com/services/photo_viewer/pictures/" + gid + "/0/0/1/0";
|
|
||||||
this.sendUpdate(STATUS.LOADING_RESOURCE, theurl);
|
|
||||||
logger.info("Loading " + theurl);
|
|
||||||
resp = Jsoup.connect(theurl)
|
|
||||||
.data(postData)
|
|
||||||
.cookies(cookies)
|
|
||||||
.referrer(ref)
|
|
||||||
.userAgent(USER_AGENT)
|
|
||||||
.method(Method.POST)
|
|
||||||
.execute();
|
|
||||||
|
|
||||||
Document doc = resp.parse();
|
|
||||||
String jsonText = doc.body().html();
|
|
||||||
jsonText = jsonText.replace(""", "\"");
|
|
||||||
System.err.println(jsonText);
|
|
||||||
JSONObject json = new JSONObject(jsonText);
|
|
||||||
JSONArray pictures = json.getJSONArray("pictures");
|
|
||||||
for (int i = 0; i < pictures.length(); i++) {
|
|
||||||
JSONObject picture = pictures.getJSONObject(i);
|
|
||||||
String bigImage = picture.getString("big_image");
|
|
||||||
if (bigImage.trim().equals("")) {
|
|
||||||
logger.info("Got empty image for " + picture.toString(2));
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
String prefix = "";
|
|
||||||
if (Utils.getConfigBoolean("download.save_order", true)) {
|
|
||||||
prefix = String.format("%03d_", i + 1);
|
|
||||||
}
|
|
||||||
addURLToDownload(new URL(bigImage), prefix);
|
|
||||||
|
|
||||||
if (isThisATest()) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
waitForThreads();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getHost() {
|
public String getHost() {
|
||||||
return HOST;
|
return "modelmayhem";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getDomain() {
|
||||||
|
return "modelmayhem.com";
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getGID(URL url) throws MalformedURLException {
|
public String getGID(URL url) throws MalformedURLException {
|
||||||
Pattern p = Pattern.compile("^https?://[w.]*modelmayhem.com.*/([0-9]+)/?.*$");
|
Pattern p = Pattern.compile("https?://www\\.modelmayhem\\.com/portfolio/(\\d+)/viewall");
|
||||||
Matcher m = p.matcher(url.toExternalForm());
|
Matcher m = p.matcher(url.toExternalForm());
|
||||||
if (m.matches()) {
|
if (m.matches()) {
|
||||||
return m.group(1);
|
return m.group(1);
|
||||||
}
|
}
|
||||||
throw new MalformedURLException("Modelmayhem user ID not found in " + url + ", expected http://modelmayhem.com/userid");
|
throw new MalformedURLException("Expected modelmayhem URL format: " +
|
||||||
|
"modelmayhem.com/portfolio/ID/viewall - got " + url + " instead");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Document getFirstPage() throws IOException {
|
||||||
|
// "url" is an instance field of the superclass
|
||||||
|
return Http.url(url).get();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<String> getURLsFromPage(Document doc) {
|
||||||
|
List<String> result = new ArrayList<>();
|
||||||
|
for (Element el : doc.select("tr.a_pics > td > div > a")) {
|
||||||
|
String image_URL = el.select("img").attr("src").replaceAll("_m", "");
|
||||||
|
if (image_URL.contains("http")) {
|
||||||
|
result.add(image_URL);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void downloadURL(URL url, int index) {
|
||||||
|
addURLToDownload(url, getPrefix(index));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -126,7 +126,7 @@ public class MotherlessRipper extends AlbumRipper {
|
|||||||
private URL url;
|
private URL url;
|
||||||
private int index;
|
private int index;
|
||||||
|
|
||||||
public MotherlessImageThread(URL url, int index) {
|
MotherlessImageThread(URL url, int index) {
|
||||||
super();
|
super();
|
||||||
this.url = url;
|
this.url = url;
|
||||||
this.index = index;
|
this.index = index;
|
||||||
@ -142,7 +142,7 @@ public class MotherlessRipper extends AlbumRipper {
|
|||||||
Document doc = Http.url(u)
|
Document doc = Http.url(u)
|
||||||
.referrer(u)
|
.referrer(u)
|
||||||
.get();
|
.get();
|
||||||
Pattern p = Pattern.compile("^.*__fileurl = '([^']{1,})';.*$", Pattern.DOTALL);
|
Pattern p = Pattern.compile("^.*__fileurl = '([^']+)';.*$", Pattern.DOTALL);
|
||||||
Matcher m = p.matcher(doc.outerHtml());
|
Matcher m = p.matcher(doc.outerHtml());
|
||||||
if (m.matches()) {
|
if (m.matches()) {
|
||||||
String file = m.group(1);
|
String file = m.group(1);
|
||||||
|
@ -11,11 +11,9 @@ import java.util.regex.Matcher;
|
|||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
import org.jsoup.nodes.Document;
|
import org.jsoup.nodes.Document;
|
||||||
import org.jsoup.nodes.Element;
|
import org.jsoup.nodes.Element;
|
||||||
import org.jsoup.select.Elements;
|
|
||||||
import java.util.Arrays;
|
|
||||||
|
|
||||||
public class MyhentaicomicsRipper extends AbstractHTMLRipper {
|
public class MyhentaicomicsRipper extends AbstractHTMLRipper {
|
||||||
public static boolean isTag;
|
private static boolean isTag;
|
||||||
|
|
||||||
public MyhentaicomicsRipper(URL url) throws IOException {
|
public MyhentaicomicsRipper(URL url) throws IOException {
|
||||||
super(url);
|
super(url);
|
||||||
@ -47,7 +45,7 @@ public class MyhentaicomicsRipper extends AbstractHTMLRipper {
|
|||||||
return ma.group(1);
|
return ma.group(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
Pattern pat = Pattern.compile("^https?://myhentaicomics.com/index.php/tag/([0-9]*)/?([a-zA-Z%0-9+\\?=:]*)?$");
|
Pattern pat = Pattern.compile("^https?://myhentaicomics.com/index.php/tag/([0-9]*)/?([a-zA-Z%0-9+?=:]*)?$");
|
||||||
Matcher mat = pat.matcher(url.toExternalForm());
|
Matcher mat = pat.matcher(url.toExternalForm());
|
||||||
if (mat.matches()) {
|
if (mat.matches()) {
|
||||||
isTag = true;
|
isTag = true;
|
||||||
@ -84,8 +82,8 @@ public class MyhentaicomicsRipper extends AbstractHTMLRipper {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// This replaces getNextPage when downloading from searchs and tags
|
// This replaces getNextPage when downloading from searchs and tags
|
||||||
public List<String> getNextAlbumPage(String pageUrl) {
|
private List<String> getNextAlbumPage(String pageUrl) {
|
||||||
List<String> albumPagesList = new ArrayList<String>();
|
List<String> albumPagesList = new ArrayList<>();
|
||||||
int pageNumber = 1;
|
int pageNumber = 1;
|
||||||
albumPagesList.add("http://myhentaicomics.com/index.php/" + pageUrl.split("\\?")[0] + "?page=" + Integer.toString(pageNumber));
|
albumPagesList.add("http://myhentaicomics.com/index.php/" + pageUrl.split("\\?")[0] + "?page=" + Integer.toString(pageNumber));
|
||||||
while (true) {
|
while (true) {
|
||||||
@ -115,9 +113,9 @@ public class MyhentaicomicsRipper extends AbstractHTMLRipper {
|
|||||||
return albumPagesList;
|
return albumPagesList;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<String> getAlbumsFromPage(String url) {
|
private List<String> getAlbumsFromPage(String url) {
|
||||||
List<String> pagesToRip;
|
List<String> pagesToRip;
|
||||||
List<String> result = new ArrayList<String>();
|
List<String> result = new ArrayList<>();
|
||||||
logger.info("Running getAlbumsFromPage");
|
logger.info("Running getAlbumsFromPage");
|
||||||
Document doc;
|
Document doc;
|
||||||
try {
|
try {
|
||||||
@ -161,7 +159,7 @@ public class MyhentaicomicsRipper extends AbstractHTMLRipper {
|
|||||||
url_string = url_string.replace("%28", "_");
|
url_string = url_string.replace("%28", "_");
|
||||||
url_string = url_string.replace("%29", "_");
|
url_string = url_string.replace("%29", "_");
|
||||||
url_string = url_string.replace("%2C", "_");
|
url_string = url_string.replace("%2C", "_");
|
||||||
if (isTag == true) {
|
if (isTag) {
|
||||||
logger.info("Downloading from a tag or search");
|
logger.info("Downloading from a tag or search");
|
||||||
try {
|
try {
|
||||||
sleep(500);
|
sleep(500);
|
||||||
@ -180,11 +178,11 @@ public class MyhentaicomicsRipper extends AbstractHTMLRipper {
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<String> getListOfPages(Document doc) {
|
private List<String> getListOfPages(Document doc) {
|
||||||
List<String> pages = new ArrayList<String>();
|
List<String> pages = new ArrayList<>();
|
||||||
// Get the link from the last button
|
// Get the link from the last button
|
||||||
String nextPageUrl = doc.select("a.ui-icon-right").last().attr("href");
|
String nextPageUrl = doc.select("a.ui-icon-right").last().attr("href");
|
||||||
Pattern pat = Pattern.compile("\\/index\\.php\\/tag\\/[0-9]*\\/[a-zA-Z0-9_\\-\\:+]*\\?page=(\\d+)");
|
Pattern pat = Pattern.compile("/index\\.php/tag/[0-9]*/[a-zA-Z0-9_\\-:+]*\\?page=(\\d+)");
|
||||||
Matcher mat = pat.matcher(nextPageUrl);
|
Matcher mat = pat.matcher(nextPageUrl);
|
||||||
if (mat.matches()) {
|
if (mat.matches()) {
|
||||||
logger.debug("Getting pages from a tag");
|
logger.debug("Getting pages from a tag");
|
||||||
@ -197,7 +195,7 @@ public class MyhentaicomicsRipper extends AbstractHTMLRipper {
|
|||||||
pages.add(link);
|
pages.add(link);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
Pattern pa = Pattern.compile("\\/index\\.php\\/search\\?q=[a-zA-Z0-9_\\-\\:]*\\&page=(\\d+)");
|
Pattern pa = Pattern.compile("/index\\.php/search\\?q=[a-zA-Z0-9_\\-:]*&page=(\\d+)");
|
||||||
Matcher ma = pa.matcher(nextPageUrl);
|
Matcher ma = pa.matcher(nextPageUrl);
|
||||||
if (ma.matches()) {
|
if (ma.matches()) {
|
||||||
logger.debug("Getting pages from a search");
|
logger.debug("Getting pages from a search");
|
||||||
@ -217,7 +215,7 @@ public class MyhentaicomicsRipper extends AbstractHTMLRipper {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<String> getURLsFromPage(Document doc) {
|
public List<String> getURLsFromPage(Document doc) {
|
||||||
List<String> result = new ArrayList<String>();
|
List<String> result = new ArrayList<>();
|
||||||
// Checks if this is a comic page or a page of albums
|
// Checks if this is a comic page or a page of albums
|
||||||
// If true the page is a page of albums
|
// If true the page is a page of albums
|
||||||
if (doc.toString().contains("class=\"g-item g-album\"")) {
|
if (doc.toString().contains("class=\"g-item g-album\"")) {
|
||||||
@ -241,7 +239,7 @@ public class MyhentaicomicsRipper extends AbstractHTMLRipper {
|
|||||||
for (Element el : doc.select("img")) {
|
for (Element el : doc.select("img")) {
|
||||||
String imageSource = el.attr("src");
|
String imageSource = el.attr("src");
|
||||||
// This bool is here so we don't try and download the site logo
|
// This bool is here so we don't try and download the site logo
|
||||||
if (!imageSource.startsWith("http://")) {
|
if (!imageSource.startsWith("http://") && !imageSource.startsWith("https://")) {
|
||||||
// We replace thumbs with resizes so we can the full sized images
|
// We replace thumbs with resizes so we can the full sized images
|
||||||
imageSource = imageSource.replace("thumbs", "resizes");
|
imageSource = imageSource.replace("thumbs", "resizes");
|
||||||
result.add("http://myhentaicomics.com/" + imageSource);
|
result.add("http://myhentaicomics.com/" + imageSource);
|
||||||
|
@ -86,7 +86,7 @@ public class NatalieMuRipper extends AbstractHTMLRipper {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<String> getURLsFromPage(Document page) {
|
public List<String> getURLsFromPage(Document page) {
|
||||||
List<String> imageURLs = new ArrayList<String>();
|
List<String> imageURLs = new ArrayList<>();
|
||||||
Pattern p; Matcher m;
|
Pattern p; Matcher m;
|
||||||
//select all album thumbnails
|
//select all album thumbnails
|
||||||
for (Element span : page.select(".NA_articleGallery span")) {
|
for (Element span : page.select(".NA_articleGallery span")) {
|
||||||
|
@ -75,7 +75,7 @@ public class NfsfwRipper extends AlbumRipper {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void rip() throws IOException {
|
public void rip() throws IOException {
|
||||||
List<Pair> subAlbums = new ArrayList<Pair>();
|
List<Pair> subAlbums = new ArrayList<>();
|
||||||
int index = 0;
|
int index = 0;
|
||||||
subAlbums.add(new Pair(this.url.toExternalForm(), ""));
|
subAlbums.add(new Pair(this.url.toExternalForm(), ""));
|
||||||
while (subAlbums.size() > 0) {
|
while (subAlbums.size() > 0) {
|
||||||
@ -153,7 +153,7 @@ public class NfsfwRipper extends AlbumRipper {
|
|||||||
private String subdir;
|
private String subdir;
|
||||||
private int index;
|
private int index;
|
||||||
|
|
||||||
public NfsfwImageThread(URL url, String subdir, int index) {
|
NfsfwImageThread(URL url, String subdir, int index) {
|
||||||
super();
|
super();
|
||||||
this.url = url;
|
this.url = url;
|
||||||
this.subdir = subdir;
|
this.subdir = subdir;
|
||||||
@ -187,8 +187,9 @@ public class NfsfwRipper extends AlbumRipper {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private class Pair {
|
private class Pair {
|
||||||
public String first, second;
|
String first;
|
||||||
public Pair(String first, String second) {
|
String second;
|
||||||
|
Pair(String first, String second) {
|
||||||
this.first = first;
|
this.first = first;
|
||||||
this.second = second;
|
this.second = second;
|
||||||
}
|
}
|
||||||
|
@ -61,7 +61,7 @@ public class NhentaiRipper extends AbstractHTMLRipper {
|
|||||||
if (title == null) {
|
if (title == null) {
|
||||||
return getAlbumTitle(url);
|
return getAlbumTitle(url);
|
||||||
}
|
}
|
||||||
return title;
|
return "nhentai" + title;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -87,7 +87,7 @@ public class NhentaiRipper extends AbstractHTMLRipper {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<String> getURLsFromPage(Document page) {
|
public List<String> getURLsFromPage(Document page) {
|
||||||
List<String> imageURLs = new ArrayList<String>();
|
List<String> imageURLs = new ArrayList<>();
|
||||||
Elements thumbs = page.select(".gallerythumb");
|
Elements thumbs = page.select(".gallerythumb");
|
||||||
for (Element el : thumbs) {
|
for (Element el : thumbs) {
|
||||||
String imageUrl = el.attr("href");
|
String imageUrl = el.attr("href");
|
||||||
|
@ -55,7 +55,7 @@ public class NudeGalsRipper extends AbstractHTMLRipper {
|
|||||||
Pattern p;
|
Pattern p;
|
||||||
Matcher m;
|
Matcher m;
|
||||||
|
|
||||||
p = Pattern.compile("^.*nude-gals\\.com\\/photoshoot\\.php\\?photoshoot_id=(\\d+)$");
|
p = Pattern.compile("^.*nude-gals\\.com/photoshoot\\.php\\?photoshoot_id=(\\d+)$");
|
||||||
m = p.matcher(url.toExternalForm());
|
m = p.matcher(url.toExternalForm());
|
||||||
if (m.matches()) {
|
if (m.matches()) {
|
||||||
return m.group(1);
|
return m.group(1);
|
||||||
@ -77,7 +77,7 @@ public class NudeGalsRipper extends AbstractHTMLRipper {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<String> getURLsFromPage(Document doc) {
|
public List<String> getURLsFromPage(Document doc) {
|
||||||
List<String> imageURLs = new ArrayList<String>();
|
List<String> imageURLs = new ArrayList<>();
|
||||||
|
|
||||||
Elements thumbs = doc.select("#grid_container .grid > .grid_box");
|
Elements thumbs = doc.select("#grid_container .grid > .grid_box");
|
||||||
for (Element thumb : thumbs) {
|
for (Element thumb : thumbs) {
|
||||||
|
@ -0,0 +1,87 @@
|
|||||||
|
package com.rarchives.ripme.ripper.rippers;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.net.MalformedURLException;
|
||||||
|
import java.net.URL;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
import org.jsoup.nodes.Document;
|
||||||
|
import org.jsoup.nodes.Element;
|
||||||
|
|
||||||
|
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||||
|
import com.rarchives.ripme.utils.Http;
|
||||||
|
|
||||||
|
public class OglafRipper extends AbstractHTMLRipper {
|
||||||
|
|
||||||
|
public OglafRipper(URL url) throws IOException {
|
||||||
|
super(url);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getHost() {
|
||||||
|
return "oglaf";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getDomain() {
|
||||||
|
return "oglaf.com";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getGID(URL url) throws MalformedURLException {
|
||||||
|
Pattern p = Pattern.compile("http://oglaf\\.com/([a-zA-Z1-9_-]*)/?");
|
||||||
|
Matcher m = p.matcher(url.toExternalForm());
|
||||||
|
if (m.matches()) {
|
||||||
|
return m.group(1);
|
||||||
|
}
|
||||||
|
throw new MalformedURLException("Expected oglaf URL format: " +
|
||||||
|
"oglaf.com/NAME - got " + url + " instead");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||||
|
return getDomain();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Document getFirstPage() throws IOException {
|
||||||
|
// "url" is an instance field of the superclass
|
||||||
|
return Http.url(url).get();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Document getNextPage(Document doc) throws IOException {
|
||||||
|
if (doc.select("div#nav > a > div#nx").first() == null) {
|
||||||
|
throw new IOException("No more pages");
|
||||||
|
}
|
||||||
|
Element elem = doc.select("div#nav > a > div#nx").first().parent();
|
||||||
|
String nextPage = elem.attr("href");
|
||||||
|
// Some times this returns a empty string
|
||||||
|
// This for stops that
|
||||||
|
if (nextPage.equals("")) {
|
||||||
|
throw new IOException("No more pages");
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
sleep(1000);
|
||||||
|
return Http.url("http://oglaf.com" + nextPage).get();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<String> getURLsFromPage(Document doc) {
|
||||||
|
List<String> result = new ArrayList<>();
|
||||||
|
for (Element el : doc.select("b > img#strip")) {
|
||||||
|
String imageSource = el.select("img").attr("src");
|
||||||
|
result.add(imageSource);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void downloadURL(URL url, int index) {
|
||||||
|
addURLToDownload(url, getPrefix(index));
|
||||||
|
}
|
||||||
|
}
|
@ -13,21 +13,22 @@ import java.util.ArrayList;
|
|||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.logging.Level;
|
|
||||||
import java.util.logging.Logger;
|
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
import org.jsoup.nodes.Document;
|
import org.jsoup.nodes.Document;
|
||||||
import org.jsoup.nodes.Element;
|
import org.jsoup.nodes.Element;
|
||||||
import org.jsoup.select.Elements;
|
import org.jsoup.select.Elements;
|
||||||
|
|
||||||
public class PahealRipper extends AbstractHTMLRipper {
|
public class PahealRipper extends AbstractHTMLRipper {
|
||||||
|
private static final Logger logger = Logger.getLogger(PahealRipper.class);
|
||||||
|
|
||||||
private static Map<String, String> cookies = null;
|
private static Map<String, String> cookies = null;
|
||||||
private static Pattern gidPattern = null;
|
private static Pattern gidPattern = null;
|
||||||
|
|
||||||
private static Map<String, String> getCookies() {
|
private static Map<String, String> getCookies() {
|
||||||
if (cookies == null) {
|
if (cookies == null) {
|
||||||
cookies = new HashMap<String, String>(1);
|
cookies = new HashMap<>(1);
|
||||||
cookies.put("ui-tnc-agreed", "true");
|
cookies.put("ui-tnc-agreed", "true");
|
||||||
}
|
}
|
||||||
return cookies;
|
return cookies;
|
||||||
@ -66,7 +67,7 @@ public class PahealRipper extends AbstractHTMLRipper {
|
|||||||
@Override
|
@Override
|
||||||
public List<String> getURLsFromPage(Document page) {
|
public List<String> getURLsFromPage(Document page) {
|
||||||
Elements elements = page.select(".shm-thumb.thumb>a").not(".shm-thumb-link");
|
Elements elements = page.select(".shm-thumb.thumb>a").not(".shm-thumb-link");
|
||||||
List<String> res = new ArrayList<String>(elements.size());
|
List<String> res = new ArrayList<>(elements.size());
|
||||||
|
|
||||||
for (Element e : elements) {
|
for (Element e : elements) {
|
||||||
res.add(e.absUrl("href"));
|
res.add(e.absUrl("href"));
|
||||||
@ -92,10 +93,8 @@ public class PahealRipper extends AbstractHTMLRipper {
|
|||||||
+ Utils.filesystemSafe(new URI(name).getPath())
|
+ Utils.filesystemSafe(new URI(name).getPath())
|
||||||
+ ext);
|
+ ext);
|
||||||
addURLToDownload(url, outFile);
|
addURLToDownload(url, outFile);
|
||||||
} catch (IOException ex) {
|
} catch (IOException | URISyntaxException ex) {
|
||||||
Logger.getLogger(PahealRipper.class.getName()).log(Level.SEVERE, null, ex);
|
logger.error("Error while downloading URL " + url, ex);
|
||||||
} catch (URISyntaxException ex) {
|
|
||||||
Logger.getLogger(PahealRipper.class.getName()).log(Level.SEVERE, null, ex);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -117,7 +116,7 @@ public class PahealRipper extends AbstractHTMLRipper {
|
|||||||
try {
|
try {
|
||||||
return Utils.filesystemSafe(new URI(getTerm(url)).getPath());
|
return Utils.filesystemSafe(new URI(getTerm(url)).getPath());
|
||||||
} catch (URISyntaxException ex) {
|
} catch (URISyntaxException ex) {
|
||||||
Logger.getLogger(PahealRipper.class.getName()).log(Level.SEVERE, null, ex);
|
logger.error(ex);
|
||||||
}
|
}
|
||||||
|
|
||||||
throw new MalformedURLException("Expected paheal.net URL format: rule34.paheal.net/post/list/searchterm - got " + url + " instead");
|
throw new MalformedURLException("Expected paheal.net URL format: rule34.paheal.net/post/list/searchterm - got " + url + " instead");
|
||||||
|
@ -85,8 +85,8 @@ public class PhotobucketRipper extends AlbumRipper {
|
|||||||
public void rip() throws IOException {
|
public void rip() throws IOException {
|
||||||
List<String> subalbums = ripAlbumAndGetSubalbums(this.url.toExternalForm());
|
List<String> subalbums = ripAlbumAndGetSubalbums(this.url.toExternalForm());
|
||||||
|
|
||||||
List<String> subsToRip = new ArrayList<String>(),
|
List<String> subsToRip = new ArrayList<>(),
|
||||||
rippedSubs = new ArrayList<String>();
|
rippedSubs = new ArrayList<>();
|
||||||
|
|
||||||
for (String sub : subalbums) {
|
for (String sub : subalbums) {
|
||||||
subsToRip.add(sub);
|
subsToRip.add(sub);
|
||||||
@ -117,7 +117,7 @@ public class PhotobucketRipper extends AlbumRipper {
|
|||||||
waitForThreads();
|
waitForThreads();
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<String> ripAlbumAndGetSubalbums(String theUrl) throws IOException {
|
private List<String> ripAlbumAndGetSubalbums(String theUrl) throws IOException {
|
||||||
int filesIndex = 0,
|
int filesIndex = 0,
|
||||||
filesTotal = 0,
|
filesTotal = 0,
|
||||||
pageIndex = 0;
|
pageIndex = 0;
|
||||||
@ -145,7 +145,7 @@ public class PhotobucketRipper extends AlbumRipper {
|
|||||||
}
|
}
|
||||||
// Grab the JSON
|
// Grab the JSON
|
||||||
Pattern p; Matcher m;
|
Pattern p; Matcher m;
|
||||||
p = Pattern.compile("^.*collectionData: (\\{.*\\}).*$", Pattern.DOTALL);
|
p = Pattern.compile("^.*collectionData: (\\{.*}).*$", Pattern.DOTALL);
|
||||||
m = p.matcher(data);
|
m = p.matcher(data);
|
||||||
if (m.matches()) {
|
if (m.matches()) {
|
||||||
jsonString = m.group(1);
|
jsonString = m.group(1);
|
||||||
@ -176,12 +176,12 @@ public class PhotobucketRipper extends AlbumRipper {
|
|||||||
if (url != null) {
|
if (url != null) {
|
||||||
return getSubAlbums(url, currentAlbumPath);
|
return getSubAlbums(url, currentAlbumPath);
|
||||||
} else {
|
} else {
|
||||||
return new ArrayList<String>();
|
return new ArrayList<>();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private List<String> getSubAlbums(String url, String currentAlbumPath) {
|
private List<String> getSubAlbums(String url, String currentAlbumPath) {
|
||||||
List<String> result = new ArrayList<String>();
|
List<String> result = new ArrayList<>();
|
||||||
String subdomain = url.substring(url.indexOf("://")+3);
|
String subdomain = url.substring(url.indexOf("://")+3);
|
||||||
subdomain = subdomain.substring(0, subdomain.indexOf("."));
|
subdomain = subdomain.substring(0, subdomain.indexOf("."));
|
||||||
String apiUrl = "http://" + subdomain + ".photobucket.com/component/Albums-SubalbumList"
|
String apiUrl = "http://" + subdomain + ".photobucket.com/component/Albums-SubalbumList"
|
||||||
|
@ -0,0 +1,107 @@
|
|||||||
|
package com.rarchives.ripme.ripper.rippers;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.net.MalformedURLException;
|
||||||
|
import java.net.URL;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
import org.jsoup.nodes.Document;
|
||||||
|
import org.jsoup.nodes.Element;
|
||||||
|
|
||||||
|
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||||
|
import com.rarchives.ripme.utils.Http;
|
||||||
|
|
||||||
|
public class PichunterRipper extends AbstractHTMLRipper {
|
||||||
|
|
||||||
|
public PichunterRipper(URL url) throws IOException {
|
||||||
|
super(url);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getHost() {
|
||||||
|
return "pichunter";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getDomain() {
|
||||||
|
return "pichunter.com";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getGID(URL url) throws MalformedURLException {
|
||||||
|
Pattern p = Pattern.compile("https?://www.pichunter.com/(|tags|models|sites)/(\\S*)/?");
|
||||||
|
Matcher m = p.matcher(url.toExternalForm());
|
||||||
|
if (m.matches()) {
|
||||||
|
return m.group(2);
|
||||||
|
}
|
||||||
|
p = Pattern.compile("https?://www.pichunter.com/(tags|models|sites)/(\\S*)/photos/\\d+/?");
|
||||||
|
m = p.matcher(url.toExternalForm());
|
||||||
|
if (m.matches()) {
|
||||||
|
return m.group(2);
|
||||||
|
}
|
||||||
|
p = Pattern.compile("https?://www.pichunter.com/tags/all/(\\S*)/\\d+/?");
|
||||||
|
m = p.matcher(url.toExternalForm());
|
||||||
|
if (m.matches()) {
|
||||||
|
return m.group(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
p = Pattern.compile("https?://www.pichunter.com/gallery/\\d+/(\\S*)/?");
|
||||||
|
m = p.matcher(url.toExternalForm());
|
||||||
|
if (m.matches()) {
|
||||||
|
return m.group(1);
|
||||||
|
}
|
||||||
|
throw new MalformedURLException("Expected pichunter URL format: " +
|
||||||
|
"pichunter.com/(tags|models|sites)/Name/ - got " + url + " instead");
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean isPhotoSet(URL url) {
|
||||||
|
Pattern p = Pattern.compile("https?://www.pichunter.com/gallery/\\d+/(\\S*)/?");
|
||||||
|
Matcher m = p.matcher(url.toExternalForm());
|
||||||
|
if (m.matches()) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Document getFirstPage() throws IOException {
|
||||||
|
// "url" is an instance field of the superclass
|
||||||
|
return Http.url(url).get();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Document getNextPage(Document doc) throws IOException {
|
||||||
|
// We use comic-nav-next to the find the next page
|
||||||
|
Element elem = doc.select("div.paperSpacings > ul > li.arrow").last();
|
||||||
|
if (elem != null) {
|
||||||
|
String nextPage = elem.select("a").attr("href");
|
||||||
|
// Some times this returns a empty string
|
||||||
|
// This for stops that
|
||||||
|
return Http.url("http://www.pichunter.com" + nextPage).get();
|
||||||
|
}
|
||||||
|
throw new IOException("No more pages");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<String> getURLsFromPage(Document doc) {
|
||||||
|
List<String> result = new ArrayList<>();
|
||||||
|
if (!isPhotoSet(url)) {
|
||||||
|
for (Element el : doc.select("div.thumbtable > a.thumb > img")) {
|
||||||
|
result.add(el.attr("src").replaceAll("_i", "_o"));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (Element el : doc.select("div.flex-images > figure > a.item > img")) {
|
||||||
|
result.add(el.attr("src").replaceAll("_i", "_o"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void downloadURL(URL url, int index) {
|
||||||
|
addURLToDownload(url, getPrefix(index));
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,67 @@
|
|||||||
|
package com.rarchives.ripme.ripper.rippers;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.net.MalformedURLException;
|
||||||
|
import java.net.URL;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
import org.jsoup.nodes.Document;
|
||||||
|
import org.jsoup.nodes.Element;
|
||||||
|
|
||||||
|
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||||
|
import com.rarchives.ripme.utils.Http;
|
||||||
|
|
||||||
|
public class PorncomixRipper extends AbstractHTMLRipper {
|
||||||
|
|
||||||
|
public PorncomixRipper(URL url) throws IOException {
|
||||||
|
super(url);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getHost() {
|
||||||
|
return "porncomix";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getDomain() {
|
||||||
|
return "porncomix.info";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getGID(URL url) throws MalformedURLException {
|
||||||
|
Pattern p = Pattern.compile("https?://www.porncomix.info/([a-zA-Z0-9_\\-]*)/?$");
|
||||||
|
Matcher m = p.matcher(url.toExternalForm());
|
||||||
|
if (m.matches()) {
|
||||||
|
return m.group(1);
|
||||||
|
}
|
||||||
|
throw new MalformedURLException("Expected proncomix URL format: " +
|
||||||
|
"porncomix.info/comic - got " + url + " instead");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Document getFirstPage() throws IOException {
|
||||||
|
// "url" is an instance field of the superclass
|
||||||
|
return Http.url(url).get();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<String> getURLsFromPage(Document doc) {
|
||||||
|
List<String> result = new ArrayList<>();
|
||||||
|
for (Element el : doc.select("div.single-post > div.gallery > dl > dt > a > img")) {
|
||||||
|
String imageSource = el.attr("data-lazy-src");
|
||||||
|
// We remove the .md from images so we download the full size image
|
||||||
|
// not the thumbnail ones
|
||||||
|
imageSource = imageSource.replaceAll("-\\d\\d\\dx\\d\\d\\d", "");
|
||||||
|
result.add(imageSource);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void downloadURL(URL url, int index) {
|
||||||
|
addURLToDownload(url, getPrefix(index));
|
||||||
|
}
|
||||||
|
}
|
@ -19,7 +19,7 @@ import com.rarchives.ripme.utils.Utils;
|
|||||||
|
|
||||||
public class PornhubRipper extends AlbumRipper {
|
public class PornhubRipper extends AlbumRipper {
|
||||||
// All sleep times are in milliseconds
|
// All sleep times are in milliseconds
|
||||||
private static final int IMAGE_SLEEP_TIME = 1 * 1000;
|
private static final int IMAGE_SLEEP_TIME = 1000;
|
||||||
|
|
||||||
private static final String DOMAIN = "pornhub.com", HOST = "Pornhub";
|
private static final String DOMAIN = "pornhub.com", HOST = "Pornhub";
|
||||||
|
|
||||||
@ -134,7 +134,7 @@ public class PornhubRipper extends AlbumRipper {
|
|||||||
private URL url;
|
private URL url;
|
||||||
private int index;
|
private int index;
|
||||||
|
|
||||||
public PornhubImageThread(URL url, int index, File workingDir) {
|
PornhubImageThread(URL url, int index, File workingDir) {
|
||||||
super();
|
super();
|
||||||
this.url = url;
|
this.url = url;
|
||||||
this.index = index;
|
this.index = index;
|
||||||
|
@ -1,5 +1,10 @@
|
|||||||
package com.rarchives.ripme.ripper.rippers;
|
package com.rarchives.ripme.ripper.rippers;
|
||||||
|
|
||||||
|
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||||
|
import com.rarchives.ripme.utils.Http;
|
||||||
|
import org.jsoup.nodes.Document;
|
||||||
|
import org.jsoup.nodes.Element;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.MalformedURLException;
|
import java.net.MalformedURLException;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
@ -8,54 +13,47 @@ import java.util.List;
|
|||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
import org.jsoup.nodes.Document;
|
|
||||||
import org.jsoup.nodes.Element;
|
|
||||||
|
|
||||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
public class PornpicsRipper extends AbstractHTMLRipper {
|
||||||
import com.rarchives.ripme.utils.Http;
|
|
||||||
|
|
||||||
public class DatwinRipper extends AbstractHTMLRipper {
|
public PornpicsRipper(URL url) throws IOException {
|
||||||
|
|
||||||
public DatwinRipper(URL url) throws IOException {
|
|
||||||
super(url);
|
super(url);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getHost() {
|
public String getHost() {
|
||||||
return "datwin";
|
return "pornpics";
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getDomain() {
|
public String getDomain() {
|
||||||
return "datw.in";
|
return "pornpics.com";
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getGID(URL url) throws MalformedURLException {
|
public String getGID(URL url) throws MalformedURLException {
|
||||||
Pattern p = Pattern.compile("^.*datw.in/([a-zA-Z0-9\\-_]+).*$");
|
Pattern p = Pattern.compile("https?://www.pornpics.com/galleries/([a-zA-Z0-9_-]*)/?");
|
||||||
Matcher m = p.matcher(url.toExternalForm());
|
Matcher m = p.matcher(url.toExternalForm());
|
||||||
if (m.matches()) {
|
if (m.matches()) {
|
||||||
return m.group(1);
|
return m.group(1);
|
||||||
}
|
}
|
||||||
throw new MalformedURLException(
|
throw new MalformedURLException("Expected pornpics URL format: " +
|
||||||
"Expected datw.in gallery formats: "
|
"www.pornpics.com/galleries/ID - got " + url + " instead");
|
||||||
+ "datw.in/..."
|
|
||||||
+ " Got: " + url);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Document getFirstPage() throws IOException {
|
public Document getFirstPage() throws IOException {
|
||||||
|
// "url" is an instance field of the superclass
|
||||||
return Http.url(url).get();
|
return Http.url(url).get();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<String> getURLsFromPage(Document doc) {
|
public List<String> getURLsFromPage(Document doc) {
|
||||||
List<String> imageURLs = new ArrayList<String>();
|
List<String> result = new ArrayList<>();
|
||||||
for (Element thumb : doc.select("img.attachment-thumbnail")) {
|
for (Element el : doc.select("a.rel-link")) {
|
||||||
String image = thumb.attr("src");
|
result.add(el.attr("href"));
|
||||||
image = image.replaceAll("-\\d{1,3}x\\d{1,3}", "");
|
|
||||||
imageURLs.add(image);
|
|
||||||
}
|
}
|
||||||
return imageURLs;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
@ -64,7 +64,7 @@ public class RajceRipper extends AbstractHTMLRipper {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<String> getURLsFromPage(Document page) {
|
public List<String> getURLsFromPage(Document page) {
|
||||||
List<String> result = new ArrayList<String>();
|
List<String> result = new ArrayList<>();
|
||||||
for (Element el : page.select("a.photoThumb")) {
|
for (Element el : page.select("a.photoThumb")) {
|
||||||
result.add(el.attr("href"));
|
result.add(el.attr("href"));
|
||||||
}
|
}
|
||||||
|
@ -27,7 +27,7 @@ public class RedditRipper extends AlbumRipper {
|
|||||||
private static final String HOST = "reddit";
|
private static final String HOST = "reddit";
|
||||||
private static final String DOMAIN = "reddit.com";
|
private static final String DOMAIN = "reddit.com";
|
||||||
|
|
||||||
private static final String REDDIT_USER_AGENT = "RipMe:github/4pr0n/ripme:" + UpdateUtils.getThisJarVersion() + " (by /u/4_pr0n)";
|
private static final String REDDIT_USER_AGENT = "RipMe:github.com/RipMeApp/ripme:" + UpdateUtils.getThisJarVersion() + " (by /u/metaprime and /u/ineedmorealts)";
|
||||||
|
|
||||||
private static final int SLEEP_TIME = 2000;
|
private static final int SLEEP_TIME = 2000;
|
||||||
|
|
||||||
@ -131,7 +131,7 @@ public class RedditRipper extends AlbumRipper {
|
|||||||
Object jsonObj = new JSONTokener(jsonString).nextValue();
|
Object jsonObj = new JSONTokener(jsonString).nextValue();
|
||||||
JSONArray jsonArray = new JSONArray();
|
JSONArray jsonArray = new JSONArray();
|
||||||
if (jsonObj instanceof JSONObject) {
|
if (jsonObj instanceof JSONObject) {
|
||||||
jsonArray.put( (JSONObject) jsonObj);
|
jsonArray.put(jsonObj);
|
||||||
} else if (jsonObj instanceof JSONArray) {
|
} else if (jsonObj instanceof JSONArray) {
|
||||||
jsonArray = (JSONArray) jsonObj;
|
jsonArray = (JSONArray) jsonObj;
|
||||||
} else {
|
} else {
|
||||||
@ -167,7 +167,7 @@ public class RedditRipper extends AlbumRipper {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void handleBody(String body, String id) {
|
private void handleBody(String body, String id) {
|
||||||
Pattern p = RipUtils.getURLRegex();
|
Pattern p = RipUtils.getURLRegex();
|
||||||
Matcher m = p.matcher(body);
|
Matcher m = p.matcher(body);
|
||||||
while (m.find()) {
|
while (m.find()) {
|
||||||
@ -179,7 +179,7 @@ public class RedditRipper extends AlbumRipper {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void handleURL(String theUrl, String id) {
|
private void handleURL(String theUrl, String id) {
|
||||||
URL originalURL;
|
URL originalURL;
|
||||||
try {
|
try {
|
||||||
originalURL = new URL(theUrl);
|
originalURL = new URL(theUrl);
|
||||||
@ -220,21 +220,21 @@ public class RedditRipper extends AlbumRipper {
|
|||||||
@Override
|
@Override
|
||||||
public String getGID(URL url) throws MalformedURLException {
|
public String getGID(URL url) throws MalformedURLException {
|
||||||
// User
|
// User
|
||||||
Pattern p = Pattern.compile("^https?://[a-zA-Z0-9\\.]{0,4}reddit\\.com/(user|u)/([a-zA-Z0-9_\\-]{3,}).*$");
|
Pattern p = Pattern.compile("^https?://[a-zA-Z0-9.]{0,4}reddit\\.com/(user|u)/([a-zA-Z0-9_\\-]{3,}).*$");
|
||||||
Matcher m = p.matcher(url.toExternalForm());
|
Matcher m = p.matcher(url.toExternalForm());
|
||||||
if (m.matches()) {
|
if (m.matches()) {
|
||||||
return "user_" + m.group(m.groupCount());
|
return "user_" + m.group(m.groupCount());
|
||||||
}
|
}
|
||||||
|
|
||||||
// Post
|
// Post
|
||||||
p = Pattern.compile("^https?://[a-zA-Z0-9\\.]{0,4}reddit\\.com/.*comments/([a-zA-Z0-9]{1,8}).*$");
|
p = Pattern.compile("^https?://[a-zA-Z0-9.]{0,4}reddit\\.com/.*comments/([a-zA-Z0-9]{1,8}).*$");
|
||||||
m = p.matcher(url.toExternalForm());
|
m = p.matcher(url.toExternalForm());
|
||||||
if (m.matches()) {
|
if (m.matches()) {
|
||||||
return "post_" + m.group(m.groupCount());
|
return "post_" + m.group(m.groupCount());
|
||||||
}
|
}
|
||||||
|
|
||||||
// Subreddit
|
// Subreddit
|
||||||
p = Pattern.compile("^https?://[a-zA-Z0-9\\.]{0,4}reddit\\.com/r/([a-zA-Z0-9_]{1,}).*$");
|
p = Pattern.compile("^https?://[a-zA-Z0-9.]{0,4}reddit\\.com/r/([a-zA-Z0-9_]+).*$");
|
||||||
m = p.matcher(url.toExternalForm());
|
m = p.matcher(url.toExternalForm());
|
||||||
if (m.matches()) {
|
if (m.matches()) {
|
||||||
return "sub_" + m.group(m.groupCount());
|
return "sub_" + m.group(m.groupCount());
|
||||||
|
@ -21,7 +21,7 @@ import com.rarchives.ripme.utils.Http;
|
|||||||
|
|
||||||
public class SankakuComplexRipper extends AbstractHTMLRipper {
|
public class SankakuComplexRipper extends AbstractHTMLRipper {
|
||||||
private Document albumDoc = null;
|
private Document albumDoc = null;
|
||||||
private Map<String,String> cookies = new HashMap<String,String>();
|
private Map<String,String> cookies = new HashMap<>();
|
||||||
|
|
||||||
public SankakuComplexRipper(URL url) throws IOException {
|
public SankakuComplexRipper(URL url) throws IOException {
|
||||||
super(url);
|
super(url);
|
||||||
@ -43,7 +43,7 @@ public class SankakuComplexRipper extends AbstractHTMLRipper {
|
|||||||
Matcher m = p.matcher(url.toExternalForm());
|
Matcher m = p.matcher(url.toExternalForm());
|
||||||
if (m.matches()) {
|
if (m.matches()) {
|
||||||
try {
|
try {
|
||||||
return URLDecoder.decode(m.group(1), "UTF-8");
|
return URLDecoder.decode(m.group(2), "UTF-8");
|
||||||
} catch (UnsupportedEncodingException e) {
|
} catch (UnsupportedEncodingException e) {
|
||||||
throw new MalformedURLException("Cannot decode tag name '" + m.group(1) + "'");
|
throw new MalformedURLException("Cannot decode tag name '" + m.group(1) + "'");
|
||||||
}
|
}
|
||||||
@ -65,34 +65,41 @@ public class SankakuComplexRipper extends AbstractHTMLRipper {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<String> getURLsFromPage(Document doc) {
|
public List<String> getURLsFromPage(Document doc) {
|
||||||
List<String> imageURLs = new ArrayList<String>();
|
List<String> imageURLs = new ArrayList<>();
|
||||||
// Image URLs are basically thumbnail URLs with a different domain, a simple
|
// Image URLs are basically thumbnail URLs with a different domain, a simple
|
||||||
// path replacement, and a ?xxxxxx post ID at the end (obtainable from the href)
|
// path replacement, and a ?xxxxxx post ID at the end (obtainable from the href)
|
||||||
for (Element thumbSpan : doc.select("div.content > div > span.thumb")) {
|
for (Element thumbSpan : doc.select("div.content > div > span.thumb > a")) {
|
||||||
String postId = thumbSpan.attr("id").replaceAll("p", "");
|
String postLink = thumbSpan.attr("href");
|
||||||
Element thumb = thumbSpan.getElementsByTag("img").first();
|
try {
|
||||||
String image = thumb.attr("abs:src")
|
// Get the page the full sized image is on
|
||||||
.replace(".sankakucomplex.com/data/preview",
|
Document subPage = Http.url("https://chan.sankakucomplex.com" + postLink).get();
|
||||||
"s.sankakucomplex.com/data") + "?" + postId;
|
logger.info("Checking page " + "https://chan.sankakucomplex.com" + postLink);
|
||||||
imageURLs.add(image);
|
imageURLs.add("https:" + subPage.select("div[id=stats] > ul > li > a[id=highres]").attr("href"));
|
||||||
|
} catch (IOException e) {
|
||||||
|
logger.warn("Error while loading page " + postLink, e);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return imageURLs;
|
return imageURLs;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void downloadURL(URL url, int index) {
|
public void downloadURL(URL url, int index) {
|
||||||
// Mock up the URL of the post page based on the post ID at the end of the URL.
|
sleep(8000);
|
||||||
String postId = url.toExternalForm().replaceAll(".*\\?", "");
|
addURLToDownload(url, getPrefix(index));
|
||||||
addURLToDownload(url, postId + "_", "", "", null);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Document getNextPage(Document doc) throws IOException {
|
public Document getNextPage(Document doc) throws IOException {
|
||||||
Element pagination = doc.select("div.pagination").first();
|
Element pagination = doc.select("div.pagination").first();
|
||||||
if (pagination.hasAttr("next-page-url")) {
|
if (pagination.hasAttr("next-page-url")) {
|
||||||
return Http.url(pagination.attr("abs:next-page-url")).cookies(cookies).get();
|
String nextPage = pagination.attr("abs:next-page-url");
|
||||||
} else {
|
// Only logged in users can see past page 25
|
||||||
return null;
|
// Trying to rip page 26 will throw a no images found error
|
||||||
|
if (!nextPage.contains("page=26")) {
|
||||||
|
logger.info("Getting next page: " + pagination.attr("abs:next-page-url"));
|
||||||
|
return Http.url(pagination.attr("abs:next-page-url")).cookies(cookies).get();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
throw new IOException("No more pages");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -48,10 +48,10 @@ public class ShesFreakyRipper extends AbstractHTMLRipper {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<String> getURLsFromPage(Document doc) {
|
public List<String> getURLsFromPage(Document doc) {
|
||||||
List<String> imageURLs = new ArrayList<String>();
|
List<String> imageURLs = new ArrayList<>();
|
||||||
for (Element thumb : doc.select("a[data-lightbox=\"gallery\"]")) {
|
for (Element thumb : doc.select("a[data-lightbox=\"gallery\"]")) {
|
||||||
String image = thumb.attr("href");
|
String image = thumb.attr("href");
|
||||||
imageURLs.add(image);
|
imageURLs.add("https:" + image);
|
||||||
}
|
}
|
||||||
return imageURLs;
|
return imageURLs;
|
||||||
}
|
}
|
||||||
|
@ -2,24 +2,16 @@ package com.rarchives.ripme.ripper.rippers;
|
|||||||
|
|
||||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||||
import com.rarchives.ripme.utils.Http;
|
import com.rarchives.ripme.utils.Http;
|
||||||
import com.rarchives.ripme.utils.Utils;
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.MalformedURLException;
|
import java.net.MalformedURLException;
|
||||||
import java.net.URI;
|
|
||||||
import java.net.URISyntaxException;
|
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
|
||||||
import java.util.logging.Level;
|
|
||||||
import java.util.logging.Logger;
|
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
import org.jsoup.nodes.Document;
|
import org.jsoup.nodes.Document;
|
||||||
import org.jsoup.nodes.Element;
|
import org.jsoup.nodes.Element;
|
||||||
import org.jsoup.select.Elements;
|
|
||||||
|
|
||||||
public class SinnercomicsRipper extends AbstractHTMLRipper {
|
public class SinnercomicsRipper extends AbstractHTMLRipper {
|
||||||
|
|
||||||
@ -71,7 +63,7 @@ public class SinnercomicsRipper extends AbstractHTMLRipper {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<String> getURLsFromPage(Document doc) {
|
public List<String> getURLsFromPage(Document doc) {
|
||||||
List<String> result = new ArrayList<String>();
|
List<String> result = new ArrayList<>();
|
||||||
for (Element el : doc.select("meta[property=og:image]")) {
|
for (Element el : doc.select("meta[property=og:image]")) {
|
||||||
String imageSource = el.attr("content");
|
String imageSource = el.attr("content");
|
||||||
imageSource = imageSource.replace(" alt=", "");
|
imageSource = imageSource.replace(" alt=", "");
|
||||||
|
@ -1,95 +0,0 @@
|
|||||||
package com.rarchives.ripme.ripper.rippers;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.net.MalformedURLException;
|
|
||||||
import java.net.URL;
|
|
||||||
import java.util.regex.Matcher;
|
|
||||||
import java.util.regex.Pattern;
|
|
||||||
|
|
||||||
import org.jsoup.HttpStatusException;
|
|
||||||
import org.jsoup.nodes.Document;
|
|
||||||
import org.jsoup.nodes.Element;
|
|
||||||
import org.jsoup.select.Elements;
|
|
||||||
|
|
||||||
import com.rarchives.ripme.ripper.AlbumRipper;
|
|
||||||
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
|
||||||
import com.rarchives.ripme.utils.Http;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Appears to be broken as of 2015-02-11.
|
|
||||||
* Looks like supertangas changed their site completely.
|
|
||||||
*/
|
|
||||||
public class SupertangasRipper extends AlbumRipper {
|
|
||||||
|
|
||||||
private static final String DOMAIN = "supertangas.com",
|
|
||||||
HOST = "supertangas";
|
|
||||||
|
|
||||||
public SupertangasRipper(URL url) throws IOException {
|
|
||||||
super(url);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean canRip(URL url) {
|
|
||||||
return url.getHost().endsWith(DOMAIN);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public URL sanitizeURL(URL url) throws MalformedURLException {
|
|
||||||
return url;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void rip() throws IOException {
|
|
||||||
int page = 0;
|
|
||||||
String baseURL = "http://www.supertangas.com/fotos/?level=search&exact=1&searchterms=" + this.getGID(this.url);
|
|
||||||
Document doc;
|
|
||||||
while (true) {
|
|
||||||
page++;
|
|
||||||
String theURL = baseURL;
|
|
||||||
if (page > 1) {
|
|
||||||
theURL += "&plog_page=" + page;
|
|
||||||
}
|
|
||||||
try {
|
|
||||||
logger.info(" Retrieving " + theURL);
|
|
||||||
sendUpdate(STATUS.LOADING_RESOURCE, theURL);
|
|
||||||
doc = Http.url(theURL).get();
|
|
||||||
} catch (HttpStatusException e) {
|
|
||||||
logger.debug("Hit end of pages at page " + page, e);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
Elements images = doc.select("li.thumbnail a");
|
|
||||||
if (images.size() == 0) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
for (Element imageElement : images) {
|
|
||||||
String image = imageElement.attr("href");
|
|
||||||
image = image.replaceAll("\\/fotos\\/", "/fotos/images/");
|
|
||||||
addURLToDownload(new URL(image));
|
|
||||||
}
|
|
||||||
try {
|
|
||||||
Thread.sleep(1000);
|
|
||||||
} catch (InterruptedException e) {
|
|
||||||
logger.error("[!] Interrupted while waiting to load next page", e);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
waitForThreads();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getHost() {
|
|
||||||
return HOST;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getGID(URL url) throws MalformedURLException {
|
|
||||||
// http://www.supertangas.com/fotos/?level=search&exact=1&searchterms=Tahiticora%20(France)
|
|
||||||
Pattern p = Pattern.compile("^https?://[w.]*supertangas\\.com/fotos/\\?.*&searchterms=([a-zA-Z0-9%()+]+).*$");
|
|
||||||
Matcher m = p.matcher(url.toExternalForm());
|
|
||||||
if (!m.matches()) {
|
|
||||||
throw new MalformedURLException("Expected format: http://supertangas.com/fotos/?level=search&exact=1&searchterms=...");
|
|
||||||
}
|
|
||||||
return m.group(m.groupCount());
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
@ -18,19 +18,19 @@ import com.rarchives.ripme.utils.Http;
|
|||||||
import com.rarchives.ripme.utils.Utils;
|
import com.rarchives.ripme.utils.Utils;
|
||||||
|
|
||||||
class TapasticEpisode {
|
class TapasticEpisode {
|
||||||
protected int index, id;
|
int id;
|
||||||
protected String title, filename;
|
String filename;
|
||||||
public TapasticEpisode(int index, int id, String title) {
|
public TapasticEpisode(int index, int id, String title) {
|
||||||
this.index = index;
|
int index1 = index;
|
||||||
this.id = id;
|
this.id = id;
|
||||||
this.title = title;
|
String title1 = title;
|
||||||
this.filename = Utils.filesystemSafe(title);
|
this.filename = Utils.filesystemSafe(title);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public class TapasticRipper extends AbstractHTMLRipper {
|
public class TapasticRipper extends AbstractHTMLRipper {
|
||||||
|
|
||||||
private List<TapasticEpisode> episodes=new ArrayList<TapasticEpisode>();
|
private List<TapasticEpisode> episodes= new ArrayList<>();
|
||||||
|
|
||||||
public TapasticRipper(URL url) throws IOException {
|
public TapasticRipper(URL url) throws IOException {
|
||||||
super(url);
|
super(url);
|
||||||
@ -38,12 +38,12 @@ public class TapasticRipper extends AbstractHTMLRipper {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getDomain() {
|
public String getDomain() {
|
||||||
return "tapastic.com";
|
return "tapas.io";
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getHost() {
|
public String getHost() {
|
||||||
return "tapastic";
|
return "tapas";
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -53,7 +53,7 @@ public class TapasticRipper extends AbstractHTMLRipper {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<String> getURLsFromPage(Document page) {
|
public List<String> getURLsFromPage(Document page) {
|
||||||
List<String> urls = new ArrayList<String>();
|
List<String> urls = new ArrayList<>();
|
||||||
String html = page.data();
|
String html = page.data();
|
||||||
if (!html.contains("episodeList : ")) {
|
if (!html.contains("episodeList : ")) {
|
||||||
logger.error("No 'episodeList' found at " + this.url);
|
logger.error("No 'episodeList' found at " + this.url);
|
||||||
@ -100,12 +100,12 @@ public class TapasticRipper extends AbstractHTMLRipper {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getGID(URL url) throws MalformedURLException {
|
public String getGID(URL url) throws MalformedURLException {
|
||||||
Pattern p = Pattern.compile("^http://tapastic.com/series/([^/?]+).*$");
|
Pattern p = Pattern.compile("^https?://tapas.io/series/([^/?]+).*$");
|
||||||
Matcher m = p.matcher(url.toExternalForm());
|
Matcher m = p.matcher(url.toExternalForm());
|
||||||
if (m.matches()) {
|
if (m.matches()) {
|
||||||
return "series_ " + m.group(1);
|
return "series_ " + m.group(1);
|
||||||
}
|
}
|
||||||
p = Pattern.compile("^http://tapastic.com/episode/([^/?]+).*$");
|
p = Pattern.compile("^https?://tapas.io/episode/([^/?]+).*$");
|
||||||
m = p.matcher(url.toExternalForm());
|
m = p.matcher(url.toExternalForm());
|
||||||
if (m.matches()) {
|
if (m.matches()) {
|
||||||
return "ep_" + m.group(1);
|
return "ep_" + m.group(1);
|
||||||
|
@ -3,27 +3,18 @@ package com.rarchives.ripme.ripper.rippers;
|
|||||||
|
|
||||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||||
import com.rarchives.ripme.utils.Http;
|
import com.rarchives.ripme.utils.Http;
|
||||||
import com.rarchives.ripme.utils.Utils;
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.MalformedURLException;
|
import java.net.MalformedURLException;
|
||||||
import java.net.URI;
|
|
||||||
import java.net.URISyntaxException;
|
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
|
||||||
import java.util.logging.Level;
|
|
||||||
import java.util.logging.Logger;
|
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
import org.jsoup.nodes.Document;
|
import org.jsoup.nodes.Document;
|
||||||
import org.jsoup.nodes.Element;
|
import org.jsoup.nodes.Element;
|
||||||
import org.jsoup.select.Elements;
|
|
||||||
|
|
||||||
public class ThechiveRipper extends AbstractHTMLRipper {
|
public class ThechiveRipper extends AbstractHTMLRipper {
|
||||||
public static boolean isTag;
|
|
||||||
|
|
||||||
public ThechiveRipper(URL url) throws IOException {
|
public ThechiveRipper(URL url) throws IOException {
|
||||||
super(url);
|
super(url);
|
||||||
@ -44,7 +35,7 @@ public class ThechiveRipper extends AbstractHTMLRipper {
|
|||||||
Pattern p = Pattern.compile("^https?://thechive.com/[0-9]*/[0-9]*/[0-9]*/([a-zA-Z0-9_\\-]*)/?$");
|
Pattern p = Pattern.compile("^https?://thechive.com/[0-9]*/[0-9]*/[0-9]*/([a-zA-Z0-9_\\-]*)/?$");
|
||||||
Matcher m = p.matcher(url.toExternalForm());
|
Matcher m = p.matcher(url.toExternalForm());
|
||||||
if (m.matches()) {
|
if (m.matches()) {
|
||||||
isTag = false;
|
boolean isTag = false;
|
||||||
return m.group(1);
|
return m.group(1);
|
||||||
}
|
}
|
||||||
throw new MalformedURLException("Expected thechive.com URL format: " +
|
throw new MalformedURLException("Expected thechive.com URL format: " +
|
||||||
@ -59,7 +50,7 @@ public class ThechiveRipper extends AbstractHTMLRipper {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<String> getURLsFromPage(Document doc) {
|
public List<String> getURLsFromPage(Document doc) {
|
||||||
List<String> result = new ArrayList<String>();
|
List<String> result = new ArrayList<>();
|
||||||
for (Element el : doc.select("img.attachment-gallery-item-full")) {
|
for (Element el : doc.select("img.attachment-gallery-item-full")) {
|
||||||
String imageSource = el.attr("src");
|
String imageSource = el.attr("src");
|
||||||
// We replace thumbs with resizes so we can the full sized images
|
// We replace thumbs with resizes so we can the full sized images
|
||||||
|
@ -0,0 +1,75 @@
|
|||||||
|
package com.rarchives.ripme.ripper.rippers;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.net.MalformedURLException;
|
||||||
|
import java.net.URL;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
import org.jsoup.nodes.Document;
|
||||||
|
import org.jsoup.nodes.Element;
|
||||||
|
|
||||||
|
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||||
|
import com.rarchives.ripme.utils.Http;
|
||||||
|
|
||||||
|
public class TheyiffgalleryRipper extends AbstractHTMLRipper {
|
||||||
|
|
||||||
|
public TheyiffgalleryRipper(URL url) throws IOException {
|
||||||
|
super(url);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getHost() {
|
||||||
|
return "theyiffgallery";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getDomain() {
|
||||||
|
return "theyiffgallery.com";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getGID(URL url) throws MalformedURLException {
|
||||||
|
Pattern p = Pattern.compile("https?://theyiffgallery.com/index\\?/category/(\\d+)");
|
||||||
|
Matcher m = p.matcher(url.toExternalForm());
|
||||||
|
if (m.matches()) {
|
||||||
|
return m.group(1);
|
||||||
|
}
|
||||||
|
throw new MalformedURLException("Expected theyiffgallery URL format: " +
|
||||||
|
"theyiffgallery.com/index?/category/#### - got " + url + " instead");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Document getFirstPage() throws IOException {
|
||||||
|
// "url" is an instance field of the superclass
|
||||||
|
return Http.url(url).get();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Document getNextPage(Document doc) throws IOException {
|
||||||
|
String nextPage = doc.select("span.navPrevNext > a").attr("href");
|
||||||
|
if (nextPage != null && !nextPage.isEmpty() && nextPage.contains("start-")) {
|
||||||
|
return Http.url("https://theyiffgallery.com/" + nextPage).get();
|
||||||
|
}
|
||||||
|
throw new IOException("No more pages");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<String> getURLsFromPage(Document doc) {
|
||||||
|
List<String> result = new ArrayList<>();
|
||||||
|
for (Element el : doc.select("ul.thumbnails > li.gdthumb")) {
|
||||||
|
String imageSource = el.select("a > img").attr("src");
|
||||||
|
imageSource = imageSource.replaceAll("_data/i", "");
|
||||||
|
imageSource = imageSource.replaceAll("-\\w\\w_\\w\\d+x\\d+", "");
|
||||||
|
result.add("https://theyiffgallery.com" + imageSource);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void downloadURL(URL url, int index) {
|
||||||
|
addURLToDownload(url, getPrefix(index));
|
||||||
|
}
|
||||||
|
}
|
@ -4,6 +4,9 @@ import java.io.IOException;
|
|||||||
import java.net.HttpURLConnection;
|
import java.net.HttpURLConnection;
|
||||||
import java.net.MalformedURLException;
|
import java.net.MalformedURLException;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Random;
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
@ -34,18 +37,19 @@ public class TumblrRipper extends AlbumRipper {
|
|||||||
private static String TUMBLR_AUTH_CONFIG_KEY = "tumblr.auth";
|
private static String TUMBLR_AUTH_CONFIG_KEY = "tumblr.auth";
|
||||||
|
|
||||||
private static boolean useDefaultApiKey = false; // fall-back for bad user-specified key
|
private static boolean useDefaultApiKey = false; // fall-back for bad user-specified key
|
||||||
private static final String DEFAULT_API_KEY = "JFNLu3CbINQjRdUvZibXW9VpSEVYYtiPJ86o8YmvgLZIoKyuNX";
|
private static final List<String> apiKeys = Arrays.asList("JFNLu3CbINQjRdUvZibXW9VpSEVYYtiPJ86o8YmvgLZIoKyuNX",
|
||||||
|
"FQrwZMCxVnzonv90rgNUJcAk4FpnoS0mYuSuGYqIpM2cFgp9L4",
|
||||||
|
"qpdkY6nMknksfvYAhf2xIHp0iNRLkMlcWShxqzXyFJRxIsZ1Zz");
|
||||||
|
private static final String API_KEY = apiKeys.get(new Random().nextInt(apiKeys.size()));
|
||||||
|
|
||||||
private static final String API_KEY;
|
|
||||||
static {
|
|
||||||
API_KEY = Utils.getConfigString(TUMBLR_AUTH_CONFIG_KEY, DEFAULT_API_KEY);
|
|
||||||
}
|
|
||||||
|
|
||||||
private static String getApiKey() {
|
private static String getApiKey() {
|
||||||
if (useDefaultApiKey) {
|
if (useDefaultApiKey || Utils.getConfigString(TUMBLR_AUTH_CONFIG_KEY, "JFNLu3CbINQjRdUvZibXW9VpSEVYYtiPJ86o8YmvgLZIoKyuNX").equals("JFNLu3CbINQjRdUvZibXW9VpSEVYYtiPJ86o8YmvgLZIoKyuNX")) {
|
||||||
return DEFAULT_API_KEY;
|
logger.info("Using api key: " + API_KEY);
|
||||||
} else {
|
|
||||||
return API_KEY;
|
return API_KEY;
|
||||||
|
} else {
|
||||||
|
logger.info("Using user tumblr.auth api key");
|
||||||
|
return Utils.getConfigString(TUMBLR_AUTH_CONFIG_KEY, "JFNLu3CbINQjRdUvZibXW9VpSEVYYtiPJ86o8YmvgLZIoKyuNX");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -77,7 +81,7 @@ public class TumblrRipper extends AlbumRipper {
|
|||||||
return url;
|
return url;
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean isTumblrURL(URL url) {
|
private boolean isTumblrURL(URL url) {
|
||||||
String checkURL = "http://api.tumblr.com/v2/blog/";
|
String checkURL = "http://api.tumblr.com/v2/blog/";
|
||||||
checkURL += url.getHost();
|
checkURL += url.getHost();
|
||||||
checkURL += "/info?api_key=" + getApiKey();
|
checkURL += "/info?api_key=" + getApiKey();
|
||||||
@ -95,6 +99,7 @@ public class TumblrRipper extends AlbumRipper {
|
|||||||
@Override
|
@Override
|
||||||
public void rip() throws IOException {
|
public void rip() throws IOException {
|
||||||
String[] mediaTypes;
|
String[] mediaTypes;
|
||||||
|
boolean exceededRateLimit = false;
|
||||||
if (albumType == ALBUM_TYPE.POST) {
|
if (albumType == ALBUM_TYPE.POST) {
|
||||||
mediaTypes = new String[] { "post" };
|
mediaTypes = new String[] { "post" };
|
||||||
} else {
|
} else {
|
||||||
@ -105,12 +110,21 @@ public class TumblrRipper extends AlbumRipper {
|
|||||||
if (isStopped()) {
|
if (isStopped()) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (exceededRateLimit) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
offset = 0;
|
offset = 0;
|
||||||
while (true) {
|
while (true) {
|
||||||
if (isStopped()) {
|
if (isStopped()) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (exceededRateLimit) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
String apiURL = getTumblrApiURL(mediaType, offset);
|
String apiURL = getTumblrApiURL(mediaType, offset);
|
||||||
logger.info("Retrieving " + apiURL);
|
logger.info("Retrieving " + apiURL);
|
||||||
sendUpdate(STATUS.LOADING_RESOURCE, apiURL);
|
sendUpdate(STATUS.LOADING_RESOURCE, apiURL);
|
||||||
@ -126,6 +140,11 @@ public class TumblrRipper extends AlbumRipper {
|
|||||||
HttpStatusException status = (HttpStatusException)cause;
|
HttpStatusException status = (HttpStatusException)cause;
|
||||||
if (status.getStatusCode() == HttpURLConnection.HTTP_UNAUTHORIZED && !useDefaultApiKey) {
|
if (status.getStatusCode() == HttpURLConnection.HTTP_UNAUTHORIZED && !useDefaultApiKey) {
|
||||||
retry = true;
|
retry = true;
|
||||||
|
} else if (status.getStatusCode() == 429) {
|
||||||
|
logger.error("Tumblr rate limit has been exceeded");
|
||||||
|
sendUpdate(STATUS.DOWNLOAD_ERRORED,"Tumblr rate limit has been exceeded");
|
||||||
|
exceededRateLimit = true;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -192,7 +211,14 @@ public class TumblrRipper extends AlbumRipper {
|
|||||||
for (int j = 0; j < photos.length(); j++) {
|
for (int j = 0; j < photos.length(); j++) {
|
||||||
photo = photos.getJSONObject(j);
|
photo = photos.getJSONObject(j);
|
||||||
try {
|
try {
|
||||||
fileURL = new URL(photo.getJSONObject("original_size").getString("url"));
|
if (Utils.getConfigBoolean("tumblr.get_raw_image", false)) {
|
||||||
|
String urlString = photo.getJSONObject("original_size").getString("url").replaceAll("https", "http");
|
||||||
|
urlString = urlString.replaceAll("https?://[a-sA-Z0-9_\\-\\.]*\\.tumblr", "http://data.tumblr");
|
||||||
|
urlString = urlString.replaceAll("_\\d+\\.", "_raw.");
|
||||||
|
fileURL = new URL(urlString);
|
||||||
|
} else {
|
||||||
|
fileURL = new URL(photo.getJSONObject("original_size").getString("url").replaceAll("http", "https"));
|
||||||
|
}
|
||||||
m = p.matcher(fileURL.toString());
|
m = p.matcher(fileURL.toString());
|
||||||
if (m.matches()) {
|
if (m.matches()) {
|
||||||
addURLToDownload(fileURL);
|
addURLToDownload(fileURL);
|
||||||
@ -202,12 +228,11 @@ public class TumblrRipper extends AlbumRipper {
|
|||||||
}
|
}
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
logger.error("[!] Error while parsing photo in " + photo, e);
|
logger.error("[!] Error while parsing photo in " + photo, e);
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if (post.has("video_url")) {
|
} else if (post.has("video_url")) {
|
||||||
try {
|
try {
|
||||||
fileURL = new URL(post.getString("video_url"));
|
fileURL = new URL(post.getString("video_url").replaceAll("http", "https"));
|
||||||
addURLToDownload(fileURL);
|
addURLToDownload(fileURL);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
logger.error("[!] Error while parsing video in " + post, e);
|
logger.error("[!] Error while parsing video in " + post, e);
|
||||||
@ -254,7 +279,7 @@ public class TumblrRipper extends AlbumRipper {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getGID(URL url) throws MalformedURLException {
|
public String getGID(URL url) throws MalformedURLException {
|
||||||
final String DOMAIN_REGEX = "^https?://([a-zA-Z0-9\\-\\.]+)";
|
final String DOMAIN_REGEX = "^https?://([a-zA-Z0-9\\-.]+)";
|
||||||
|
|
||||||
Pattern p;
|
Pattern p;
|
||||||
Matcher m;
|
Matcher m;
|
||||||
|
@ -54,14 +54,14 @@ public class TwitterRipper extends AlbumRipper {
|
|||||||
@Override
|
@Override
|
||||||
public URL sanitizeURL(URL url) throws MalformedURLException {
|
public URL sanitizeURL(URL url) throws MalformedURLException {
|
||||||
// https://twitter.com/search?q=from%3Apurrbunny%20filter%3Aimages&src=typd
|
// https://twitter.com/search?q=from%3Apurrbunny%20filter%3Aimages&src=typd
|
||||||
Pattern p = Pattern.compile("^https?://(m\\.)?twitter\\.com/search\\?q=([a-zA-Z0-9%\\-_]{1,}).*$");
|
Pattern p = Pattern.compile("^https?://(m\\.)?twitter\\.com/search\\?q=([a-zA-Z0-9%\\-_]+).*$");
|
||||||
Matcher m = p.matcher(url.toExternalForm());
|
Matcher m = p.matcher(url.toExternalForm());
|
||||||
if (m.matches()) {
|
if (m.matches()) {
|
||||||
albumType = ALBUM_TYPE.SEARCH;
|
albumType = ALBUM_TYPE.SEARCH;
|
||||||
searchText = m.group(2);
|
searchText = m.group(2);
|
||||||
return url;
|
return url;
|
||||||
}
|
}
|
||||||
p = Pattern.compile("^https?://(m\\.)?twitter\\.com/([a-zA-Z0-9\\-_]{1,}).*$");
|
p = Pattern.compile("^https?://(m\\.)?twitter\\.com/([a-zA-Z0-9\\-_]+).*$");
|
||||||
m = p.matcher(url.toExternalForm());
|
m = p.matcher(url.toExternalForm());
|
||||||
if (m.matches()) {
|
if (m.matches()) {
|
||||||
albumType = ALBUM_TYPE.ACCOUNT;
|
albumType = ALBUM_TYPE.ACCOUNT;
|
||||||
@ -83,7 +83,6 @@ public class TwitterRipper extends AlbumRipper {
|
|||||||
try {
|
try {
|
||||||
JSONObject json = new JSONObject(body);
|
JSONObject json = new JSONObject(body);
|
||||||
accessToken = json.getString("access_token");
|
accessToken = json.getString("access_token");
|
||||||
return;
|
|
||||||
} catch (JSONException e) {
|
} catch (JSONException e) {
|
||||||
// Fall through
|
// Fall through
|
||||||
throw new IOException("Failure while parsing JSON: " + body, e);
|
throw new IOException("Failure while parsing JSON: " + body, e);
|
||||||
@ -142,7 +141,7 @@ public class TwitterRipper extends AlbumRipper {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private List<JSONObject> getTweets(String url) throws IOException {
|
private List<JSONObject> getTweets(String url) throws IOException {
|
||||||
List<JSONObject> tweets = new ArrayList<JSONObject>();
|
List<JSONObject> tweets = new ArrayList<>();
|
||||||
logger.info(" Retrieving " + url);
|
logger.info(" Retrieving " + url);
|
||||||
Document doc = Http.url(url)
|
Document doc = Http.url(url)
|
||||||
.ignoreContentType()
|
.ignoreContentType()
|
||||||
@ -283,7 +282,6 @@ public class TwitterRipper extends AlbumRipper {
|
|||||||
if (c == '%') {
|
if (c == '%') {
|
||||||
gid.append('_');
|
gid.append('_');
|
||||||
i += 2;
|
i += 2;
|
||||||
continue;
|
|
||||||
// Ignore non-alphanumeric chars
|
// Ignore non-alphanumeric chars
|
||||||
} else if (
|
} else if (
|
||||||
(c >= 'a' && c <= 'z')
|
(c >= 'a' && c <= 'z')
|
||||||
|
@ -22,7 +22,7 @@ import com.rarchives.ripme.utils.Http;
|
|||||||
public class TwodgalleriesRipper extends AbstractHTMLRipper {
|
public class TwodgalleriesRipper extends AbstractHTMLRipper {
|
||||||
|
|
||||||
private int offset = 0;
|
private int offset = 0;
|
||||||
private Map<String,String> cookies = new HashMap<String,String>();
|
private Map<String,String> cookies = new HashMap<>();
|
||||||
|
|
||||||
public TwodgalleriesRipper(URL url) throws IOException {
|
public TwodgalleriesRipper(URL url) throws IOException {
|
||||||
super(url);
|
super(url);
|
||||||
@ -90,7 +90,7 @@ public class TwodgalleriesRipper extends AbstractHTMLRipper {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<String> getURLsFromPage(Document doc) {
|
public List<String> getURLsFromPage(Document doc) {
|
||||||
List<String> imageURLs = new ArrayList<String>();
|
List<String> imageURLs = new ArrayList<>();
|
||||||
for (Element thumb : doc.select("div.hcaption > img")) {
|
for (Element thumb : doc.select("div.hcaption > img")) {
|
||||||
String image = thumb.attr("src");
|
String image = thumb.attr("src");
|
||||||
image = image.replace("/200H/", "/");
|
image = image.replace("/200H/", "/");
|
||||||
@ -114,7 +114,7 @@ public class TwodgalleriesRipper extends AbstractHTMLRipper {
|
|||||||
cookies = resp.cookies();
|
cookies = resp.cookies();
|
||||||
String ctoken = resp.parse().select("form > input[name=ctoken]").first().attr("value");
|
String ctoken = resp.parse().select("form > input[name=ctoken]").first().attr("value");
|
||||||
|
|
||||||
Map<String,String> postdata = new HashMap<String,String>();
|
Map<String,String> postdata = new HashMap<>();
|
||||||
postdata.put("user[login]", new String(Base64.decode("cmlwbWU=")));
|
postdata.put("user[login]", new String(Base64.decode("cmlwbWU=")));
|
||||||
postdata.put("user[password]", new String(Base64.decode("cmlwcGVy")));
|
postdata.put("user[password]", new String(Base64.decode("cmlwcGVy")));
|
||||||
postdata.put("rememberme", "1");
|
postdata.put("rememberme", "1");
|
||||||
|
@ -56,7 +56,7 @@ public class VidbleRipper extends AbstractHTMLRipper {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private static List<String> getURLsFromPageStatic(Document doc) {
|
private static List<String> getURLsFromPageStatic(Document doc) {
|
||||||
List<String> imageURLs = new ArrayList<String>();
|
List<String> imageURLs = new ArrayList<>();
|
||||||
Elements els = doc.select("#ContentPlaceHolder1_divContent");
|
Elements els = doc.select("#ContentPlaceHolder1_divContent");
|
||||||
Elements imgs = els.select("img");
|
Elements imgs = els.select("img");
|
||||||
for (Element img : imgs) {
|
for (Element img : imgs) {
|
||||||
@ -76,7 +76,7 @@ public class VidbleRipper extends AbstractHTMLRipper {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public static List<URL> getURLsFromPage(URL url) throws IOException {
|
public static List<URL> getURLsFromPage(URL url) throws IOException {
|
||||||
List<URL> urls = new ArrayList<URL>();
|
List<URL> urls = new ArrayList<>();
|
||||||
Document doc = Http.url(url).get();
|
Document doc = Http.url(url).get();
|
||||||
for (String stringURL : getURLsFromPageStatic(doc)) {
|
for (String stringURL : getURLsFromPageStatic(doc)) {
|
||||||
urls.add(new URL(stringURL));
|
urls.add(new URL(stringURL));
|
||||||
|
@ -0,0 +1,84 @@
|
|||||||
|
package com.rarchives.ripme.ripper.rippers;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.net.MalformedURLException;
|
||||||
|
import java.net.URL;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
import org.jsoup.nodes.Document;
|
||||||
|
import org.jsoup.nodes.Element;
|
||||||
|
import org.jsoup.select.Elements;
|
||||||
|
|
||||||
|
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||||
|
import com.rarchives.ripme.utils.Http;
|
||||||
|
|
||||||
|
public class ViewcomicRipper extends AbstractHTMLRipper {
|
||||||
|
|
||||||
|
public ViewcomicRipper(URL url) throws IOException {
|
||||||
|
super(url);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getHost() {
|
||||||
|
return "view-comic";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getDomain() {
|
||||||
|
return "view-comic.com";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||||
|
try {
|
||||||
|
// Attempt to use album title as GID
|
||||||
|
String titleText = getFirstPage().select("title").first().text();
|
||||||
|
String title = titleText.replace("Viewcomic reading comics online for free", "");
|
||||||
|
title = title.replace("_", "");
|
||||||
|
title = title.replace("|", "");
|
||||||
|
title = title.replace("…", "");
|
||||||
|
title = title.replace(".", "");
|
||||||
|
return getHost() + "_" + title.trim();
|
||||||
|
} catch (IOException e) {
|
||||||
|
// Fall back to default album naming convention
|
||||||
|
logger.info("Unable to find title at " + url);
|
||||||
|
}
|
||||||
|
return super.getAlbumTitle(url);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getGID(URL url) throws MalformedURLException {
|
||||||
|
Pattern p = Pattern.compile("https?://view-comic.com/([a-zA-Z1-9_-]*)/?$");
|
||||||
|
Matcher m = p.matcher(url.toExternalForm());
|
||||||
|
if (m.matches()) {
|
||||||
|
return m.group(1);
|
||||||
|
}
|
||||||
|
throw new MalformedURLException("Expected view-comic URL format: " +
|
||||||
|
"view-comic.com/COMIC_NAME - got " + url + " instead");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Document getFirstPage() throws IOException {
|
||||||
|
// "url" is an instance field of the superclass
|
||||||
|
return Http.url(url).get();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<String> getURLsFromPage(Document doc) {
|
||||||
|
List<String> result = new ArrayList<String>();
|
||||||
|
for (Element el : doc.select("div.pinbin-copy > a > img")) {
|
||||||
|
result.add(el.attr("src"));
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void downloadURL(URL url, int index) {
|
||||||
|
addURLToDownload(url, getPrefix(index));
|
||||||
|
}
|
||||||
|
}
|
@ -84,7 +84,7 @@ public class VineRipper extends AlbumRipper {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getGID(URL url) throws MalformedURLException {
|
public String getGID(URL url) throws MalformedURLException {
|
||||||
Pattern p = Pattern.compile("^https?://(www\\.)?vine\\.co/u/([0-9]{1,}).*$");
|
Pattern p = Pattern.compile("^https?://(www\\.)?vine\\.co/u/([0-9]+).*$");
|
||||||
Matcher m = p.matcher(url.toExternalForm());
|
Matcher m = p.matcher(url.toExternalForm());
|
||||||
if (!m.matches()) {
|
if (!m.matches()) {
|
||||||
throw new MalformedURLException("Expected format: http://vine.co/u/######");
|
throw new MalformedURLException("Expected format: http://vine.co/u/######");
|
||||||
|
@ -37,11 +37,7 @@ public class VkRipper extends AlbumRipper {
|
|||||||
}
|
}
|
||||||
// Ignore /video pages (but not /videos pages)
|
// Ignore /video pages (but not /videos pages)
|
||||||
String u = url.toExternalForm();
|
String u = url.toExternalForm();
|
||||||
if (u.contains("/video") && !u.contains("videos")) {
|
return !u.contains("/video") || u.contains("videos");
|
||||||
// Single video page
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -62,7 +58,7 @@ public class VkRipper extends AlbumRipper {
|
|||||||
private void ripVideos() throws IOException {
|
private void ripVideos() throws IOException {
|
||||||
String oid = getGID(this.url).replace("videos", "");
|
String oid = getGID(this.url).replace("videos", "");
|
||||||
String u = "http://vk.com/al_video.php";
|
String u = "http://vk.com/al_video.php";
|
||||||
Map<String,String> postData = new HashMap<String,String>();
|
Map<String,String> postData = new HashMap<>();
|
||||||
postData.put("al", "1");
|
postData.put("al", "1");
|
||||||
postData.put("act", "load_videos_silent");
|
postData.put("act", "load_videos_silent");
|
||||||
postData.put("offset", "0");
|
postData.put("offset", "0");
|
||||||
@ -97,13 +93,13 @@ public class VkRipper extends AlbumRipper {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private void ripImages() throws IOException {
|
private void ripImages() throws IOException {
|
||||||
Map<String,String> photoIDsToURLs = new HashMap<String,String>();
|
Map<String,String> photoIDsToURLs = new HashMap<>();
|
||||||
int offset = 0;
|
int offset = 0;
|
||||||
while (true) {
|
while (true) {
|
||||||
logger.info(" Retrieving " + this.url);
|
logger.info(" Retrieving " + this.url);
|
||||||
|
|
||||||
// al=1&offset=80&part=1
|
// al=1&offset=80&part=1
|
||||||
Map<String,String> postData = new HashMap<String,String>();
|
Map<String,String> postData = new HashMap<>();
|
||||||
postData.put("al", "1");
|
postData.put("al", "1");
|
||||||
postData.put("offset", Integer.toString(offset));
|
postData.put("offset", Integer.toString(offset));
|
||||||
postData.put("part", "1");
|
postData.put("part", "1");
|
||||||
@ -120,7 +116,7 @@ public class VkRipper extends AlbumRipper {
|
|||||||
body = body.substring(body.indexOf("<div"));
|
body = body.substring(body.indexOf("<div"));
|
||||||
doc = Jsoup.parseBodyFragment(body);
|
doc = Jsoup.parseBodyFragment(body);
|
||||||
List<Element> elements = doc.select("a");
|
List<Element> elements = doc.select("a");
|
||||||
Set<String> photoIDsToGet = new HashSet<String>();
|
Set<String> photoIDsToGet = new HashSet<>();
|
||||||
for (Element a : elements) {
|
for (Element a : elements) {
|
||||||
if (!a.attr("onclick").contains("showPhoto('")) {
|
if (!a.attr("onclick").contains("showPhoto('")) {
|
||||||
logger.error("a: " + a);
|
logger.error("a: " + a);
|
||||||
@ -162,8 +158,8 @@ public class VkRipper extends AlbumRipper {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private Map<String,String> getPhotoIDsToURLs(String photoID) throws IOException {
|
private Map<String,String> getPhotoIDsToURLs(String photoID) throws IOException {
|
||||||
Map<String,String> photoIDsToURLs = new HashMap<String,String>();
|
Map<String,String> photoIDsToURLs = new HashMap<>();
|
||||||
Map<String,String> postData = new HashMap<String,String>();
|
Map<String,String> postData = new HashMap<>();
|
||||||
// act=show&al=1&list=album45506334_172415053&module=photos&photo=45506334_304658196
|
// act=show&al=1&list=album45506334_172415053&module=photos&photo=45506334_304658196
|
||||||
postData.put("list", getGID(this.url));
|
postData.put("list", getGID(this.url));
|
||||||
postData.put("act", "show");
|
postData.put("act", "show");
|
||||||
@ -202,7 +198,7 @@ public class VkRipper extends AlbumRipper {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getGID(URL url) throws MalformedURLException {
|
public String getGID(URL url) throws MalformedURLException {
|
||||||
Pattern p = Pattern.compile("^https?://(www\\.)?vk\\.com/(photos|album|videos)-?([a-zA-Z0-9_]{1,}).*$");
|
Pattern p = Pattern.compile("^https?://(www\\.)?vk\\.com/(photos|album|videos)-?([a-zA-Z0-9_]+).*$");
|
||||||
Matcher m = p.matcher(url.toExternalForm());
|
Matcher m = p.matcher(url.toExternalForm());
|
||||||
if (!m.matches()) {
|
if (!m.matches()) {
|
||||||
throw new MalformedURLException("Expected format: http://vk.com/album#### or vk.com/photos####");
|
throw new MalformedURLException("Expected format: http://vk.com/album#### or vk.com/photos####");
|
||||||
|
@ -0,0 +1,102 @@
|
|||||||
|
package com.rarchives.ripme.ripper.rippers;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.net.MalformedURLException;
|
||||||
|
import java.net.URL;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
import org.jsoup.nodes.Document;
|
||||||
|
import org.jsoup.nodes.Element;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import org.jsoup.Connection.Response;
|
||||||
|
|
||||||
|
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||||
|
import com.rarchives.ripme.utils.Http;
|
||||||
|
|
||||||
|
public class WebtoonsRipper extends AbstractHTMLRipper {
|
||||||
|
private Map<String,String> cookies = new HashMap<String,String>();
|
||||||
|
|
||||||
|
public WebtoonsRipper(URL url) throws IOException {
|
||||||
|
super(url);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getHost() {
|
||||||
|
return "webtoons";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getDomain() {
|
||||||
|
return "www.webtoons.com";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean canRip(URL url) {
|
||||||
|
Pattern pat = Pattern.compile("https?://www.webtoons.com/[a-zA-Z]+/[a-zA-Z]+/([a-zA-Z0-9_-]*)/[a-zA-Z0-9_-]+/\\S*");
|
||||||
|
Matcher mat = pat.matcher(url.toExternalForm());
|
||||||
|
if (mat.matches()) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||||
|
Pattern pat = Pattern.compile("https?://www.webtoons.com/[a-zA-Z]+/[a-zA-Z]+/([a-zA-Z0-9_-]*)/[a-zA-Z0-9_-]+/\\S*");
|
||||||
|
Matcher mat = pat.matcher(url.toExternalForm());
|
||||||
|
if (mat.matches()) {
|
||||||
|
return getHost() + "_" + mat.group(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
return super.getAlbumTitle(url);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getGID(URL url) throws MalformedURLException {
|
||||||
|
Pattern pat = Pattern.compile("https?://www.webtoons.com/[a-zA-Z]+/[a-zA-Z]+/([a-zA-Z0-9_-]*)/[a-zA-Z0-9_-]+/\\S*");
|
||||||
|
Matcher mat = pat.matcher(url.toExternalForm());
|
||||||
|
if (mat.matches()) {
|
||||||
|
return mat.group(1);
|
||||||
|
}
|
||||||
|
throw new MalformedURLException("Expected URL format: http://www.webtoons.com/LANG/CAT/TITLE/VOL/, got: " + url);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<String> getURLsFromPage(Document doc) {
|
||||||
|
List<String> result = new ArrayList<String>();
|
||||||
|
for (Element elem : doc.select("div.viewer_img > img")) {
|
||||||
|
result.add(elem.attr("data-url"));
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void downloadURL(URL url, int index) {
|
||||||
|
addURLToDownload(url, getPrefix(index), "", this.url.toExternalForm(), cookies);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Document getFirstPage() throws IOException {
|
||||||
|
Response resp = Http.url(url).response();
|
||||||
|
cookies = resp.cookies();
|
||||||
|
return Http.url(url).get();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Document getNextPage(Document doc) throws IOException {
|
||||||
|
// Find next page
|
||||||
|
String nextUrl = "";
|
||||||
|
Element elem = doc.select("a.pg_next").first();
|
||||||
|
nextUrl = elem.attr("href");
|
||||||
|
if (nextUrl.equals("") || nextUrl.equals("#")) {
|
||||||
|
throw new IOException("No more pages");
|
||||||
|
}
|
||||||
|
return Http.url(nextUrl).get();
|
||||||
|
}
|
||||||
|
}
|
@ -16,25 +16,25 @@ import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
|||||||
import com.rarchives.ripme.utils.Http;
|
import com.rarchives.ripme.utils.Http;
|
||||||
|
|
||||||
public class WordpressComicRipper extends AbstractHTMLRipper {
|
public class WordpressComicRipper extends AbstractHTMLRipper {
|
||||||
String pageTitle = "";
|
private String pageTitle = "";
|
||||||
|
|
||||||
public WordpressComicRipper(URL url) throws IOException {
|
public WordpressComicRipper(URL url) throws IOException {
|
||||||
super(url);
|
super(url);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test links:
|
// Test links (see also WordpressComicRipperTest.java)
|
||||||
// http://www.totempole666.com/comic/first-time-for-everything-00-cover/
|
// http://www.totempole666.com/comic/first-time-for-everything-00-cover/
|
||||||
// http://buttsmithy.com/archives/comic/p1
|
// http://buttsmithy.com/archives/comic/p1
|
||||||
// http://themonsterunderthebed.net/?comic=test-post
|
// http://themonsterunderthebed.net/?comic=test-post
|
||||||
// http://prismblush.com/comic/hella-trap-pg-01/
|
// http://prismblush.com/comic/hella-trap-pg-01/
|
||||||
// http://www.konradokonski.com/sawdust/
|
// http://www.konradokonski.com/sawdust/comic/get-up/
|
||||||
// http://www.konradokonski.com/wiory/
|
// http://www.konradokonski.com/wiory/comic/08182008/
|
||||||
// http://freeadultcomix.com/finders-feepaid-in-full-sparrow/
|
// http://freeadultcomix.com/finders-feepaid-in-full-sparrow/
|
||||||
// http://comics-xxx.com/republic-rendezvous-palcomix-star-wars-xxx/
|
// http://thisis.delvecomic.com/NewWP/comic/in-too-deep/
|
||||||
// http://tnbtu.com/comic/01-00/
|
// http://tnbtu.com/comic/01-00/
|
||||||
// http://shipinbottle.pepsaga.com/?p=281
|
// http://shipinbottle.pepsaga.com/?p=281
|
||||||
|
|
||||||
public static List<String> explicit_domains = Arrays.asList(
|
private static List<String> explicit_domains = Arrays.asList(
|
||||||
"www.totempole666.com",
|
"www.totempole666.com",
|
||||||
"buttsmithy.com",
|
"buttsmithy.com",
|
||||||
"themonsterunderthebed.net",
|
"themonsterunderthebed.net",
|
||||||
@ -42,21 +42,18 @@ public class WordpressComicRipper extends AbstractHTMLRipper {
|
|||||||
"www.konradokonski.com",
|
"www.konradokonski.com",
|
||||||
"freeadultcomix.com",
|
"freeadultcomix.com",
|
||||||
"thisis.delvecomic.com",
|
"thisis.delvecomic.com",
|
||||||
"comics-xxx.com",
|
|
||||||
"tnbtu.com",
|
"tnbtu.com",
|
||||||
"shipinbottle.pepsaga.com"
|
"shipinbottle.pepsaga.com"
|
||||||
);
|
);
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getHost() {
|
public String getHost() {
|
||||||
String host = url.toExternalForm().split("/")[2];
|
return url.toExternalForm().split("/")[2];
|
||||||
return host;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getDomain() {
|
public String getDomain() {
|
||||||
String host = url.toExternalForm().split("/")[2];
|
return url.toExternalForm().split("/")[2];
|
||||||
return host;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -70,12 +67,20 @@ public class WordpressComicRipper extends AbstractHTMLRipper {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
Pattern konradokonskiPat = Pattern.compile("https?://www.konradokonski.com/sawdust/comic/([a-zA-Z0-9_-]*)/?$");
|
Pattern konradokonskiPat = Pattern.compile("https?://www.konradokonski.com/([a-zA-Z0-9_-]*)/comic/([a-zA-Z0-9_-]*)/?$");
|
||||||
Matcher konradokonskiMat = konradokonskiPat.matcher(url.toExternalForm());
|
Matcher konradokonskiMat = konradokonskiPat.matcher(url.toExternalForm());
|
||||||
if (konradokonskiMat.matches()) {
|
if (konradokonskiMat.matches()) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// This is hardcoded because it starts on the first page, unlike all the other
|
||||||
|
// konradokonski which start on the last page
|
||||||
|
konradokonskiPat = Pattern.compile("https?://www.konradokonski.com/aquartzbead/?$");
|
||||||
|
konradokonskiMat = konradokonskiPat.matcher(url.toExternalForm());
|
||||||
|
if (konradokonskiMat.matches()) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
Pattern buttsmithyPat = Pattern.compile("https?://buttsmithy.com/archives/comic/([a-zA-Z0-9_-]*)/?$");
|
Pattern buttsmithyPat = Pattern.compile("https?://buttsmithy.com/archives/comic/([a-zA-Z0-9_-]*)/?$");
|
||||||
Matcher buttsmithyMat = buttsmithyPat.matcher(url.toExternalForm());
|
Matcher buttsmithyMat = buttsmithyPat.matcher(url.toExternalForm());
|
||||||
if (buttsmithyMat.matches()) {
|
if (buttsmithyMat.matches()) {
|
||||||
@ -125,12 +130,13 @@ public class WordpressComicRipper extends AbstractHTMLRipper {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getAlbumTitle(URL url) throws MalformedURLException {
|
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||||
Pattern totempole666Pat = Pattern.compile("(?:https?://)?(?:www\\.)?totempole666.com\\/comic/([a-zA-Z0-9_-]*)/?$");
|
Pattern totempole666Pat = Pattern.compile("(?:https?://)?(?:www\\.)?totempole666.com/comic/([a-zA-Z0-9_-]*)/?$");
|
||||||
Matcher totempole666Mat = totempole666Pat.matcher(url.toExternalForm());
|
Matcher totempole666Mat = totempole666Pat.matcher(url.toExternalForm());
|
||||||
if (totempole666Mat.matches()) {
|
if (totempole666Mat.matches()) {
|
||||||
return "totempole666.com" + "_" + "The_cummoner";
|
return "totempole666.com" + "_" + "The_cummoner";
|
||||||
@ -142,16 +148,16 @@ public class WordpressComicRipper extends AbstractHTMLRipper {
|
|||||||
return "buttsmithy.com" + "_" + "Alfie";
|
return "buttsmithy.com" + "_" + "Alfie";
|
||||||
}
|
}
|
||||||
|
|
||||||
Pattern konradokonskiSawdustPat = Pattern.compile("http://www.konradokonski.com/sawdust/comic/([a-zA-Z0-9_-]*)/?$");
|
Pattern konradokonskiPat = Pattern.compile("http://www.konradokonski.com/([a-zA-Z]+)/comic/([a-zA-Z0-9_-]*)/?$");
|
||||||
Matcher konradokonskiSawdustMat = konradokonskiSawdustPat.matcher(url.toExternalForm());
|
Matcher konradokonskiMat = konradokonskiPat.matcher(url.toExternalForm());
|
||||||
if (konradokonskiSawdustMat.matches()) {
|
if (konradokonskiMat.matches()) {
|
||||||
return "konradokonski.com_sawdust";
|
return "konradokonski.com_" + konradokonskiMat.group(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
Pattern konradokonskiWioryPat = Pattern.compile("http://www.konradokonski.com/wiory/comic/([a-zA-Z0-9_-]*)/?$");
|
konradokonskiPat = Pattern.compile("https?://www.konradokonski.com/aquartzbead/?$");
|
||||||
Matcher konradokonskiWioryMat = konradokonskiWioryPat.matcher(url.toExternalForm());
|
konradokonskiMat = konradokonskiPat.matcher(url.toExternalForm());
|
||||||
if (konradokonskiWioryMat.matches()) {
|
if (konradokonskiMat.matches()) {
|
||||||
return "konradokonski.com_wiory";
|
return "konradokonski.com_aquartzbead";
|
||||||
}
|
}
|
||||||
|
|
||||||
Pattern freeadultcomixPat = Pattern.compile("https?://freeadultcomix.com/([a-zA-Z0-9_\\-]*)/?$");
|
Pattern freeadultcomixPat = Pattern.compile("https?://freeadultcomix.com/([a-zA-Z0-9_\\-]*)/?$");
|
||||||
@ -237,7 +243,7 @@ public class WordpressComicRipper extends AbstractHTMLRipper {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<String> getURLsFromPage(Document doc) {
|
public List<String> getURLsFromPage(Document doc) {
|
||||||
List<String> result = new ArrayList<String>();
|
List<String> result = new ArrayList<>();
|
||||||
if (getHost().contains("www.totempole666.com")
|
if (getHost().contains("www.totempole666.com")
|
||||||
|| getHost().contains("buttsmithy.com")
|
|| getHost().contains("buttsmithy.com")
|
||||||
|| getHost().contains("themonsterunderthebed.net")
|
|| getHost().contains("themonsterunderthebed.net")
|
||||||
@ -277,9 +283,10 @@ public class WordpressComicRipper extends AbstractHTMLRipper {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// freeadultcomix gets it own if because it needs to add http://freeadultcomix.com to the start of each link
|
// freeadultcomix gets it own if because it needs to add http://freeadultcomix.com to the start of each link
|
||||||
|
// TODO review the above comment which no longer applies -- see if there's a refactoring we should do here.
|
||||||
if (url.toExternalForm().contains("freeadultcomix.com")) {
|
if (url.toExternalForm().contains("freeadultcomix.com")) {
|
||||||
for (Element elem : doc.select("div.single-post > p > img.aligncenter")) {
|
for (Element elem : doc.select("div.single-post > p > img.aligncenter")) {
|
||||||
result.add("http://freeadultcomix.com" + elem.attr("src"));
|
result.add(elem.attr("src"));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -10,14 +10,15 @@ import java.net.URISyntaxException;
|
|||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.logging.Level;
|
|
||||||
import java.util.logging.Logger;
|
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
import org.jsoup.nodes.Document;
|
import org.jsoup.nodes.Document;
|
||||||
import org.jsoup.nodes.Element;
|
import org.jsoup.nodes.Element;
|
||||||
|
|
||||||
public class XbooruRipper extends AbstractHTMLRipper {
|
public class XbooruRipper extends AbstractHTMLRipper {
|
||||||
|
private static final Logger logger = Logger.getLogger(XbooruRipper.class);
|
||||||
|
|
||||||
private static Pattern gidPattern = null;
|
private static Pattern gidPattern = null;
|
||||||
|
|
||||||
public XbooruRipper(URL url) throws IOException {
|
public XbooruRipper(URL url) throws IOException {
|
||||||
@ -57,7 +58,7 @@ public class XbooruRipper extends AbstractHTMLRipper {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<String> getURLsFromPage(Document page) {
|
public List<String> getURLsFromPage(Document page) {
|
||||||
List<String> res = new ArrayList<String>(100);
|
List<String> res = new ArrayList<>(100);
|
||||||
for (Element e : page.getElementsByTag("post")) {
|
for (Element e : page.getElementsByTag("post")) {
|
||||||
res.add(e.absUrl("file_url") + "#" + e.attr("id"));
|
res.add(e.absUrl("file_url") + "#" + e.attr("id"));
|
||||||
}
|
}
|
||||||
@ -71,7 +72,7 @@ public class XbooruRipper extends AbstractHTMLRipper {
|
|||||||
|
|
||||||
private String getTerm(URL url) throws MalformedURLException {
|
private String getTerm(URL url) throws MalformedURLException {
|
||||||
if (gidPattern == null) {
|
if (gidPattern == null) {
|
||||||
gidPattern = Pattern.compile("^https?://(www\\.)?xbooru\\.com/(index.php)?.*([?&]tags=([a-zA-Z0-9$_.+!*'(),%-]+))(\\&|(#.*)?$)");
|
gidPattern = Pattern.compile("^https?://(www\\.)?xbooru\\.com/(index.php)?.*([?&]tags=([a-zA-Z0-9$_.+!*'(),%-]+))(&|(#.*)?$)");
|
||||||
}
|
}
|
||||||
|
|
||||||
Matcher m = gidPattern.matcher(url.toExternalForm());
|
Matcher m = gidPattern.matcher(url.toExternalForm());
|
||||||
@ -87,7 +88,7 @@ public class XbooruRipper extends AbstractHTMLRipper {
|
|||||||
try {
|
try {
|
||||||
return Utils.filesystemSafe(new URI(getTerm(url)).getPath());
|
return Utils.filesystemSafe(new URI(getTerm(url)).getPath());
|
||||||
} catch (URISyntaxException ex) {
|
} catch (URISyntaxException ex) {
|
||||||
Logger.getLogger(PahealRipper.class.getName()).log(Level.SEVERE, null, ex);
|
logger.error(ex);
|
||||||
}
|
}
|
||||||
|
|
||||||
throw new MalformedURLException("Expected xbooru.com URL format: xbooru.com/index.php?tags=searchterm - got " + url + " instead");
|
throw new MalformedURLException("Expected xbooru.com URL format: xbooru.com/index.php?tags=searchterm - got " + url + " instead");
|
||||||
|
@ -3,91 +3,46 @@ package com.rarchives.ripme.ripper.rippers;
|
|||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.MalformedURLException;
|
import java.net.MalformedURLException;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
import org.jsoup.nodes.Document;
|
import org.jsoup.nodes.Document;
|
||||||
import org.jsoup.nodes.Element;
|
import org.jsoup.nodes.Element;
|
||||||
|
|
||||||
import com.rarchives.ripme.ripper.AlbumRipper;
|
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||||
import com.rarchives.ripme.utils.Http;
|
import com.rarchives.ripme.utils.Http;
|
||||||
import com.rarchives.ripme.utils.Utils;
|
|
||||||
|
|
||||||
public class XhamsterRipper extends AlbumRipper {
|
public class XhamsterRipper extends AbstractHTMLRipper {
|
||||||
|
|
||||||
private static final String HOST = "xhamster";
|
|
||||||
|
|
||||||
public XhamsterRipper(URL url) throws IOException {
|
public XhamsterRipper(URL url) throws IOException {
|
||||||
super(url);
|
super(url);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean canRip(URL url) {
|
public String getHost() {
|
||||||
Pattern p = Pattern.compile("^https?://[wmde.]*xhamster\\.com/photos/gallery/.*[0-9]+$");
|
return "xhamster";
|
||||||
Matcher m = p.matcher(url.toExternalForm());
|
}
|
||||||
return m.matches();
|
|
||||||
|
@Override
|
||||||
|
public String getDomain() {
|
||||||
|
return "xhamster.com";
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public URL sanitizeURL(URL url) throws MalformedURLException {
|
public URL sanitizeURL(URL url) throws MalformedURLException {
|
||||||
return url;
|
String URLToReturn = url.toExternalForm();
|
||||||
}
|
URLToReturn = URLToReturn.replaceAll("m.xhamster.com", "xhamster.com");
|
||||||
|
URLToReturn = URLToReturn.replaceAll("\\w\\w.xhamster.com", "xhamster.com");
|
||||||
@Override
|
URL san_url = new URL(URLToReturn.replaceAll("xhamster.com", "m.xhamster.com"));
|
||||||
public void rip() throws IOException {
|
logger.info("sanitized URL is " + san_url.toExternalForm());
|
||||||
int index = 0;
|
return san_url;
|
||||||
String nextURL = this.url.toExternalForm();
|
|
||||||
while (nextURL != null) {
|
|
||||||
logger.info(" Retrieving " + nextURL);
|
|
||||||
Document doc = Http.url(nextURL).get();
|
|
||||||
for (Element thumb : doc.select("table.iListing div.img img")) {
|
|
||||||
if (!thumb.hasAttr("src")) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
String image = thumb.attr("src");
|
|
||||||
// replace thumbnail urls with the urls to the full sized images
|
|
||||||
image = image.replaceAll(
|
|
||||||
"https://upt.xhcdn\\.",
|
|
||||||
"http://up.xhamster.");
|
|
||||||
image = image.replaceAll("ept\\.xhcdn", "ep.xhamster");
|
|
||||||
image = image.replaceAll(
|
|
||||||
"_160\\.",
|
|
||||||
"_1000.");
|
|
||||||
// Xhamster has shitty cert management and uses the wrong cert for their ep.xhamster Domain
|
|
||||||
// so we change all https requests to http
|
|
||||||
image = image.replaceAll(
|
|
||||||
"https://",
|
|
||||||
"http://");
|
|
||||||
index += 1;
|
|
||||||
String prefix = "";
|
|
||||||
if (Utils.getConfigBoolean("download.save_order", true)) {
|
|
||||||
prefix = String.format("%03d_", index);
|
|
||||||
}
|
|
||||||
addURLToDownload(new URL(image), prefix);
|
|
||||||
if (isThisATest()) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (isThisATest()) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
nextURL = null;
|
|
||||||
for (Element element : doc.select("a.last")) {
|
|
||||||
nextURL = element.attr("href");
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
waitForThreads();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getHost() {
|
|
||||||
return HOST;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getGID(URL url) throws MalformedURLException {
|
public String getGID(URL url) throws MalformedURLException {
|
||||||
Pattern p = Pattern.compile("^https?://[wmde.]*xhamster\\.com/photos/gallery/.*?(\\d{1,})$");
|
Pattern p = Pattern.compile("^https?://[\\w\\w.]*xhamster\\.com/photos/gallery/.*?(\\d+)$");
|
||||||
Matcher m = p.matcher(url.toExternalForm());
|
Matcher m = p.matcher(url.toExternalForm());
|
||||||
if (m.matches()) {
|
if (m.matches()) {
|
||||||
return m.group(1);
|
return m.group(1);
|
||||||
@ -98,4 +53,54 @@ public class XhamsterRipper extends AlbumRipper {
|
|||||||
+ " Got: " + url);
|
+ " Got: " + url);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
|
||||||
|
@Override
|
||||||
|
public Document getFirstPage() throws IOException {
|
||||||
|
// "url" is an instance field of the superclass
|
||||||
|
return Http.url(url).get();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean canRip(URL url) {
|
||||||
|
Pattern p = Pattern.compile("^https?://[wmde.]*xhamster\\.com/photos/gallery/.*?(\\d+)$");
|
||||||
|
Matcher m = p.matcher(url.toExternalForm());
|
||||||
|
if (m.matches()) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Document getNextPage(Document doc) throws IOException {
|
||||||
|
if (doc.select("a.next").first().attr("href") != "") {
|
||||||
|
return Http.url(doc.select("a.next").first().attr("href")).get();
|
||||||
|
} else {
|
||||||
|
throw new IOException("No more pages");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<String> getURLsFromPage(Document doc) {
|
||||||
|
List<String> result = new ArrayList<>();
|
||||||
|
for (Element thumb : doc.select("div.picture_view > div.pictures_block > div.items > div.item-container > a > div.thumb_container > div.img > img")) {
|
||||||
|
String image = thumb.attr("src");
|
||||||
|
// replace thumbnail urls with the urls to the full sized images
|
||||||
|
image = image.replaceAll(
|
||||||
|
"https://upt.xhcdn\\.",
|
||||||
|
"http://up.xhamster.");
|
||||||
|
image = image.replaceAll("ept\\.xhcdn", "ep.xhamster");
|
||||||
|
image = image.replaceAll(
|
||||||
|
"_160\\.",
|
||||||
|
"_1000.");
|
||||||
|
// Xhamster has bad cert management and uses invalid certs for some cdns, so we change all our requests to http
|
||||||
|
image = image.replaceAll("https", "http");
|
||||||
|
result.add(image);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void downloadURL(URL url, int index) {
|
||||||
|
addURLToDownload(url, getPrefix(index));
|
||||||
|
}
|
||||||
|
}
|
@ -20,7 +20,7 @@ import com.rarchives.ripme.utils.Http;
|
|||||||
public class ZizkiRipper extends AbstractHTMLRipper {
|
public class ZizkiRipper extends AbstractHTMLRipper {
|
||||||
|
|
||||||
private Document albumDoc = null;
|
private Document albumDoc = null;
|
||||||
private Map<String,String> cookies = new HashMap<String,String>();
|
private Map<String,String> cookies = new HashMap<>();
|
||||||
|
|
||||||
public ZizkiRipper(URL url) throws IOException {
|
public ZizkiRipper(URL url) throws IOException {
|
||||||
super(url);
|
super(url);
|
||||||
@ -76,7 +76,7 @@ public class ZizkiRipper extends AbstractHTMLRipper {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<String> getURLsFromPage(Document page) {
|
public List<String> getURLsFromPage(Document page) {
|
||||||
List<String> imageURLs = new ArrayList<String>();
|
List<String> imageURLs = new ArrayList<>();
|
||||||
// Page contains images
|
// Page contains images
|
||||||
logger.info("Look for images.");
|
logger.info("Look for images.");
|
||||||
for (Element thumb : page.select("img")) {
|
for (Element thumb : page.select("img")) {
|
||||||
@ -99,7 +99,6 @@ public class ZizkiRipper extends AbstractHTMLRipper {
|
|||||||
src = thumb.attr("src");
|
src = thumb.attr("src");
|
||||||
logger.debug("Found url with " + src);
|
logger.debug("Found url with " + src);
|
||||||
if (!src.contains("zizki.com")) {
|
if (!src.contains("zizki.com")) {
|
||||||
continue;
|
|
||||||
} else {
|
} else {
|
||||||
imageURLs.add(src.replace("/styles/medium/public/","/styles/large/public/"));
|
imageURLs.add(src.replace("/styles/medium/public/","/styles/large/public/"));
|
||||||
}
|
}
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user