SHOW:
|
|
- or go back to the newest paste.
1 | package task; | |
2 | ||
3 | import java.io.File; | |
4 | ||
5 | public interface Checker { | |
6 | boolean check(File file); | |
7 | } | |
8 | ------------ | |
9 | import java.io.BufferedReader; | |
10 | import java.io.File; | |
11 | - | import java.nio.file.Paths; |
11 | + | |
12 | import java.io.FileNotFoundException; | |
13 | import java.io.IOException; | |
14 | import java.io.InputStreamReader; | |
15 | import java.io.PrintStream; | |
16 | import java.nio.file.FileSystems; | |
17 | import java.nio.file.Files; | |
18 | import java.nio.file.Path; | |
19 | import java.util.ArrayList; | |
20 | import java.util.Arrays; | |
21 | import java.util.Collections; | |
22 | import java.util.HashSet; | |
23 | - | import net.sf.jmimemagic.Magic; |
23 | + | |
24 | - | import net.sf.jmimemagic.MagicException; |
24 | + | |
25 | - | import net.sf.jmimemagic.MagicMatch; |
25 | + | |
26 | - | import net.sf.jmimemagic.MagicMatchNotFoundException; |
26 | + | |
27 | - | import net.sf.jmimemagic.MagicParseException; |
27 | + | |
28 | import java.util.concurrent.Executor; | |
29 | import java.util.concurrent.Executors; | |
30 | import java.util.concurrent.LinkedBlockingQueue; | |
31 | ||
32 | import com.ibm.icu.text.CharsetDetector; | |
33 | ||
34 | public class FileScan implements Runnable { | |
35 | private String whatFind; | |
36 | private BlockingQueue<File> queue; | |
37 | private CountDownLatch latch; | |
38 | private File endOfWorkFile; | |
39 | ||
40 | public FileScan(String whatFind, BlockingQueue<File> queue, | |
41 | CountDownLatch latch, File endOfWorkFile) { | |
42 | this.whatFind = whatFind; | |
43 | this.queue = queue; | |
44 | this.latch = latch; | |
45 | this.endOfWorkFile = endOfWorkFile; | |
46 | } | |
47 | ||
48 | public FileScan() { | |
49 | } | |
50 | ||
51 | @Override | |
52 | public void run() { | |
53 | ||
54 | while (true) { | |
55 | try { | |
56 | File file; | |
57 | file = queue.take(); | |
58 | ||
59 | if (file == endOfWorkFile) { | |
60 | break; | |
61 | } | |
62 | ||
63 | scan(file); | |
64 | } catch (InterruptedException e) { | |
65 | e.printStackTrace(); | |
66 | } | |
67 | } | |
68 | ||
69 | latch.countDown(); | |
70 | } | |
71 | ||
72 | private void scan(File file) { | |
73 | Scanner scanner = null; | |
74 | int matches = 0; | |
75 | ||
76 | try { | |
77 | scanner = new Scanner(file); | |
78 | } catch (FileNotFoundException e) { | |
79 | System.out.println("File Not Found."); | |
80 | e.printStackTrace(); | |
81 | } | |
82 | ||
83 | while (scanner.hasNext()) | |
84 | if (scanner.next().equals(whatFind)) { | |
85 | matches++; | |
86 | } | |
87 | - | System.out.println(myStr); |
87 | + | |
88 | if (matches > 0) { | |
89 | String myStr = String.format( | |
90 | "File: %s - and the number of matches " + "is: %d", | |
91 | - | public void askUserPathAndWord() { |
91 | + | |
92 | PrintStream ps = new PrintStream(System.out, true); | |
93 | ps.println(myStr); | |
94 | } | |
95 | } | |
96 | ||
97 | private void askUserPathAndWord() { | |
98 | try { | |
99 | tryToAskUserPathAndWord(); | |
100 | } catch (IOException | RuntimeException e) { | |
101 | System.out.println("Wrong input!"); | |
102 | e.printStackTrace(); | |
103 | } catch (InterruptedException e) { | |
104 | System.out.println("Interrupted."); | |
105 | e.printStackTrace(); | |
106 | } | |
107 | } | |
108 | ||
109 | private void tryToAskUserPathAndWord() throws IOException, | |
110 | InterruptedException { | |
111 | PathAndWord pathAndWord = readPathAndWord(); | |
112 | ||
113 | if (pathAndWord.isProperlyInitialized()) { | |
114 | performScan(pathAndWord, "GameOver.tmp"); | |
115 | System.out.println("Thank you!"); | |
116 | } else { | |
117 | System.out.println("You did not enter anything"); | |
118 | } | |
119 | } | |
120 | ||
121 | private PathAndWord readPathAndWord() throws IOException { | |
122 | System.out | |
123 | .println("Please, enter a Path and Word (which you want to find):"); | |
124 | ||
125 | BufferedReader bufferedReader = new BufferedReader( | |
126 | new InputStreamReader(System.in)); | |
127 | ||
128 | String path = readPath(bufferedReader); | |
129 | String whatFind = readWord(bufferedReader); | |
130 | return new PathAndWord(path, whatFind); | |
131 | } | |
132 | ||
133 | private String readPath(BufferedReader bufferedReader) throws IOException { | |
134 | boolean ok = false; | |
135 | File pathInput; | |
136 | ||
137 | do { | |
138 | System.out.println("Please enter a Path:"); | |
139 | pathInput = new File(bufferedReader.readLine()); | |
140 | if (pathInput.exists() && pathInput.isDirectory()) { | |
141 | ok = true; | |
142 | } else { | |
143 | System.out.println("Doesn't exist or isn't folder!"); | |
144 | } | |
145 | } while (!ok); | |
146 | ||
147 | return pathInput.getAbsolutePath(); | |
148 | } | |
149 | ||
150 | private String readWord(BufferedReader bufferedReader) throws IOException { | |
151 | System.out.println("Please enter a Word:"); | |
152 | return bufferedReader.readLine(); | |
153 | } | |
154 | ||
155 | private void performScan(PathAndWord pathAndWord, String endOfWorkFileName) | |
156 | throws InterruptedException { | |
157 | BlockingQueue<File> queue = new LinkedBlockingQueue<File>(); | |
158 | ||
159 | File endOfWorkFile = new File(endOfWorkFileName); | |
160 | CountDownLatch latch = new CountDownLatch(2); | |
161 | ||
162 | FolderScan folderScan = new FolderScan(pathAndWord.path, queue, latch, | |
163 | endOfWorkFile); | |
164 | FileScan fileScan = new FileScan(pathAndWord.whatFind, queue, latch, | |
165 | endOfWorkFile); | |
166 | ||
167 | Executor executor = Executors.newCachedThreadPool(); | |
168 | executor.execute(folderScan); | |
169 | executor.execute(fileScan); | |
170 | ||
171 | latch.await(); | |
172 | } | |
173 | ||
174 | /** | |
175 | * @param args | |
176 | */ | |
177 | ||
178 | public static void main(String[] args) { | |
179 | long startTime = System.currentTimeMillis(); | |
180 | ||
181 | new FileScan().askUserPathAndWord(); | |
182 | ||
183 | long stopTime = System.currentTimeMillis(); | |
184 | long elapsedTime = stopTime - startTime; | |
185 | System.out.println("\nRuntime time " + elapsedTime + " milliseconds."); | |
186 | } | |
187 | } | |
188 | ||
189 | class FolderScan implements Runnable { | |
190 | ||
191 | FolderScan(String path, BlockingQueue<File> queue, CountDownLatch latch, | |
192 | File endOfWorkFile) { | |
193 | - | new ExtentionCheckers(), /*new ProbeContentTypeCheker(),*/ |
193 | + | |
194 | - | new EncodingsCheck() )); |
194 | + | |
195 | this.latch = latch; | |
196 | this.endOfWorkFile = endOfWorkFile; | |
197 | - | public FolderScan() { |
197 | + | |
198 | checkers = new ArrayList<Checker>(Arrays.asList( | |
199 | new ExtentionChecking(), new ProbeContentTypeCheking(), | |
200 | new EncodingChecking())); | |
201 | ||
202 | - | findFiles(path); |
202 | + | List<UncriticalChecker> checkers = Collections |
203 | - | queue.add(endOfWorkFile); |
203 | + | .singletonList(new UncriticalChecker()); |
204 | FolderScan folderScan = new FolderScan(path, queue, latch, | |
205 | endOfWorkFile, checkers); | |
206 | } | |
207 | ||
208 | public FolderScan(String path2, BlockingQueue<File> queue2, | |
209 | CountDownLatch latch2, File endOfWorkFile2, | |
210 | List<UncriticalChecker> checkers2) { | |
211 | } | |
212 | ||
213 | - | boolean checksFailed = true; |
213 | + | |
214 | public void run() { | |
215 | try { | |
216 | findFiles(path); | |
217 | - | // if (currentFile.getAbsolutePath().equals( |
217 | + | queue.put(endOfWorkFile); |
218 | - | // "E:\\Document\\!Nazar\\BOOKS\\)\\PDF\\readme.txt")) { |
218 | + | latch.countDown(); |
219 | - | // System.out.println(); |
219 | + | |
220 | - | // } |
220 | + | |
221 | } | |
222 | } | |
223 | - | checksFailed = false; |
223 | + | |
224 | private void findFiles(String path) { | |
225 | ||
226 | try { | |
227 | File root = new File(path); | |
228 | - | if (checksFailed) { |
228 | + | |
229 | for (File currentFile : list) { | |
230 | boolean checksPassed = true; | |
231 | if (currentFile.isDirectory()) { | |
232 | findFiles(currentFile.getAbsolutePath()); | |
233 | } else { | |
234 | for (Checker currentChecker : checkers) { | |
235 | if (!currentChecker.check(currentFile)) { | |
236 | checksPassed = false; | |
237 | break; | |
238 | } | |
239 | } | |
240 | ||
241 | if (checksPassed) { | |
242 | queue.put(currentFile); | |
243 | } | |
244 | } | |
245 | } | |
246 | } catch (InterruptedException e) { | |
247 | e.printStackTrace(); | |
248 | } | |
249 | ||
250 | } | |
251 | ||
252 | private String path; | |
253 | private BlockingQueue<File> queue; | |
254 | private CountDownLatch latch; | |
255 | private File endOfWorkFile; | |
256 | private List<Checker> checkers; | |
257 | private List<UncriticalChecker> checkList; | |
258 | } | |
259 | ||
260 | - | class ExtentionCheckers implements Checker { |
260 | + | class UncriticalChecker implements Checker { |
261 | @Override | |
262 | public boolean check(File currentFile) { | |
263 | return true; | |
264 | } | |
265 | } | |
266 | ||
267 | class PathAndWord { | |
268 | - | ".djv", ".rar", ".rtf")); |
268 | + | |
269 | final String whatFind; | |
270 | ||
271 | PathAndWord(String path, String whatFind) { | |
272 | this.path = path; | |
273 | this.whatFind = whatFind; | |
274 | } | |
275 | ||
276 | boolean isProperlyInitialized() { | |
277 | return path != null && whatFind != null; | |
278 | - | class EncodingsCheck implements Checker { |
278 | + | |
279 | } | |
280 | ||
281 | class ExtentionChecking implements Checker { | |
282 | - | return isUTF8(currentFile); |
282 | + | |
283 | @Override | |
284 | public boolean check(File currentFile) { | |
285 | - | public static boolean isUTF8(File file) { |
285 | + | |
286 | Set<String> extensions = new HashSet<String>(Arrays.asList(".txt", | |
287 | ".pdf", ".doc", ".docx", ".html", ".htm", ".xml", ".djvu", | |
288 | ".djv", ".rar", ".rtf", ".tmp")); | |
289 | ||
290 | if (extensions.contains(fileName.substring(fileName.lastIndexOf(".")))) { | |
291 | return true; | |
292 | } | |
293 | ||
294 | return false; | |
295 | } | |
296 | ||
297 | private String fileName; | |
298 | } | |
299 | ||
300 | class EncodingChecking implements Checker { | |
301 | ||
302 | @Override | |
303 | public boolean check(File currentFile) { | |
304 | - | if (0 == (buffer[0] & 0x80)) { |
304 | + | return detectEncoding(currentFile); |
305 | - | return true; // ASCII subset character, fast path |
305 | + | |
306 | - | } else if (0xF0 == (buffer[0] & 0xF8)) { // start of 4-byte sequence |
306 | + | |
307 | - | if (buffer[3] >= buffer.length) { |
307 | + | public static boolean detectEncoding(File file) { |
308 | - | return false; |
308 | + | detector = new CharsetDetector(); |
309 | ||
310 | - | if ((0x80 == (buffer[1] & 0xC0)) && (0x80 == (buffer[2] & 0xC0)) |
310 | + | |
311 | - | && (0x80 == (buffer[3] & 0xC0))) |
311 | + | |
312 | throw new IllegalArgumentException("input file can't be null"); | |
313 | - | } else if (0xE0 == (buffer[0] & 0xF0)) { // start of 3-byte sequence |
313 | + | |
314 | - | if (buffer[2] >= buffer.length) { |
314 | + | |
315 | - | return false; |
315 | + | |
316 | "input file refers to a directory"); | |
317 | - | if ((0x80 == (buffer[1] & 0xC0)) && (0x80 == (buffer[2] & 0xC0))) { |
317 | + | |
318 | ||
319 | // read input file | |
320 | - | } else if (0xC0 == (buffer[0] & 0xE0)) { // start of 2-byte sequence |
320 | + | |
321 | - | if (buffer[1] >= buffer.length) { |
321 | + | |
322 | - | return false; |
322 | + | |
323 | } catch (IOException e) { | |
324 | - | if (0x80 == (buffer[1] & 0xC0)) { |
324 | + | |
325 | "Can't read input file, error = " + e.getLocalizedMessage()); | |
326 | } | |
327 | ||
328 | if (detector.setText(buffer) != null) { | |
329 | return true; | |
330 | } | |
331 | ||
332 | return false; | |
333 | } | |
334 | ||
335 | - | try{ |
335 | + | |
336 | - | byte firstBytes[] = new byte[4]; |
336 | + | |
337 | - | int count = fileInputStream.read(firstBytes); |
337 | + | |
338 | - | if(count < 4){ |
338 | + | |
339 | - | throw new IOException("Empty file"); |
339 | + | byte firstBytes[] = new byte[50]; |
340 | - | } |
340 | + | int count = fileInputStream.read(firstBytes); |
341 | - | return firstBytes; |
341 | + | if (count < 5) { |
342 | - | } finally { |
342 | + | throw new IOException("Poor file!"); |
343 | - | fileInputStream.close(); |
343 | + | |
344 | - | } |
344 | + | return firstBytes; |
345 | } finally { | |
346 | fileInputStream.close(); | |
347 | } | |
348 | - | class ProbeContentTypeCheker implements Checker { |
348 | + | |
349 | ||
350 | private static CharsetDetector detector; | |
351 | } | |
352 | - | String mimeType = null; |
352 | + | |
353 | class ProbeContentTypeCheking implements Checker { | |
354 | - | Path path = Paths.get(currentFile.getAbsolutePath()); |
354 | + | |
355 | - | byte[] data = Files.readAllBytes(path); |
355 | + | |
356 | - | MagicMatch match = Magic.getMagicMatch(data); |
356 | + | |
357 | - | mimeType = match.getMimeType(); |
357 | + | |
358 | - | } catch (MagicParseException | MagicMatchNotFoundException |
358 | + | Path filePath = FileSystems.getDefault().getPath( |
359 | - | | MagicException | IOException e) { |
359 | + | currentFile.getAbsolutePath()); |
360 | if ((null != Files.probeContentType(filePath))) { | |
361 | return true; | |
362 | } | |
363 | - | if (null != mimeType) { |
363 | + | |
364 | e.printStackTrace(); | |
365 | } | |
366 | ||
367 | return false; | |
368 | } | |
369 | } |