Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- @Override
- public void onCommand(final String message, final String... args) {
- final StringTools stringTools = getClient().getStringTools();
- try {
- if (stringTools.isBoolean(args[1])) {
- String url = args[0];
- boolean images = Boolean.parseBoolean(args[1]);
- Document doc = Jsoup.connect(url).get();
- Elements rawTitle = doc.select("#firstHeading");
- String[] title = rawTitle.text().split(":");
- getClient().getLogs().addLog("Images: " + images);
- getClient().getLogs().addLog("Title: " + title[0]);
- getClient().getLogs().addLog("Volume: " + title[1]);
- File directory = createMainDirectory();
- File bookDirectory = createBookDirectory(directory, title[0]
- + " - " + title[1]);
- File tempDirectory = createBookDirectory(bookDirectory, "Temp");
- File imageDirectory = createBookDirectory(tempDirectory,
- "Images");
- File htmlDirectory = createBookDirectory(tempDirectory, "Text");
- createBookDirectory(tempDirectory, "Style");
- createBookDirectory(tempDirectory, "Fonts");
- createBookDirectory(tempDirectory, "Audio");
- createBookDirectory(tempDirectory, "Video");
- createBookDirectory(tempDirectory, "Misc");
- Elements rawChapters = doc.select(".mw-headline");
- Iterator<Element> chaptersIterator = rawChapters.iterator();
- while (chaptersIterator.hasNext()) {
- Element chapter = chaptersIterator.next();
- addChapter(new ChapterObject(chapter.html(),
- chapter.text(), getClient()), true);
- }
- String rawHtml = replacer(doc.outerHtml());
- String[] html = rawHtml.split("\n");
- List<String> tempContentList = new LinkedList<String>();
- for (int index = 0; index < html.length; index++) {
- String content = html[index];
- if (content.contains("<span class=\"mw-headline\"")
- && !content.contains("Translator")) {
- if (!tempContentList.contains(content)) tempContentList
- .add(content);
- for (int chapterIndex = 0; chapterIndex < tempContentList
- .size(); chapterIndex++) {
- ChapterObject chapter = getLoadedChapters().get(
- chapterIndex);
- if (chapter.getIndex() == -1) chapter.setIndex(
- index, true);
- }
- }
- }
- for (int indexContent = 0; indexContent < getLoadedChapters()
- .size() - 1; indexContent++) {
- ChapterObject chapter = getLoadedChapters().get(
- indexContent);
- if (indexContent != getLoadedChapters().size()) {
- ChapterObject secondChapter = getLoadedChapters().get(
- indexContent + 1);
- int start = 0;
- int end = secondChapter.getIndex() - 1;
- for (start = chapter.getIndex() + 1; start < end; start++) {
- String chapterContent = html[start];
- addChapterContent(chapter,
- chapterContent.replaceAll("^\\s+", ""),
- false, images, imageDirectory, title[0]);
- }
- }
- if (indexContent + 2 == getLoadedChapters().size()) {
- ChapterObject chapterLast = getLoadedChapters().get(
- indexContent + 1);
- int start = 0;
- int end = html.length;
- for (start = chapterLast.getIndex() + 1; start < end; start++) {
- String chapterContent = html[start];
- addChapterContent(chapterLast,
- chapterContent.replaceAll("^\\s+", ""),
- false, images, imageDirectory, title[0]);
- }
- }
- }
- getClient()
- .getLogs()
- .addLog("Finished scraping content, starting to download images & generate HTML files.");
- for (ChapterObject chapter : getLoadedChapters()) {
- try {
- for (int imageIndex = 0; imageIndex < chapter
- .getImages().size(); imageIndex++) {
- String imageUrl = chapter.getImages().get(
- imageIndex);
- download(imageUrl, imageDirectory,
- title[0].replace(" ", "-") + "-"
- + imageIndex + ".jpg", chapter);
- }
- File htmlFile = new File(htmlDirectory,
- chapter.getName() + ".html");
- FileOutputStream mfos = new FileOutputStream(htmlFile);
- Writer out = new OutputStreamWriter(mfos, "UTF-8");
- out.write("<body>");
- // out.write("\r\n");
- out.write("<h1 id=\"Title\">");
- out.write(chapter.getName());
- out.write("</h1>");
- // out.write("\r\n");
- for (String content : chapter.getContent()) {
- byte[] outByte = UnicodeUtils.convert(
- content.getBytes(), "UTF-8");
- String output = new String(outByte);
- output = output.substring(output.indexOf("<"));
- out.write(output);
- }
- out.write("</body>");
- // out.write("\r\n");
- out.flush();
- out.close();
- } catch (Exception exception) {
- exception.printStackTrace();
- }
- getClient().getLogs().addLog(
- "HTML File has been succesfully created for '"
- + chapter.getName() + "'");
- }
- saveOrder("", tempDirectory);
- System.out.println(tempDirectory.getAbsolutePath());
- getClient().getLogs().addLog("All done!"); // fix this shit
- // oliver, please
- }
- } catch (Exception exception) {
- exception.printStackTrace();
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement