Advertisement
mullerdaniil

lab15

Nov 16th, 2020
93
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 12.53 KB | None | 0 0
  1. #include <stdio.h>
  2. #include <string.h>
  3. #include <stdlib.h>
  4.  
  5.  
  6. typedef struct Node {
  7. struct Node *left, *right;
  8. unsigned char byte;
  9. int frequency;
  10. } Node;
  11.  
  12.  
  13. void treeTraversal(Node *node, unsigned char codeMap[], char *codes, int *lastIndex, char currentString[]) {
  14. if (node->left == NULL && node->right == NULL) {
  15.  
  16. codeMap[*lastIndex] = node->byte;
  17. char *stringPointer = codes + *lastIndex * 400;
  18. char *currentStringPointer = currentString;
  19.  
  20. int stringLength = strlen(currentString);
  21. for (int i = 0; i < stringLength; i++) {
  22. (*stringPointer) = (*currentStringPointer);
  23. stringPointer++;
  24. currentStringPointer++;
  25. }
  26.  
  27. (*stringPointer) = '\0';
  28.  
  29. (*lastIndex)++;
  30. return;
  31. }
  32.  
  33. char currentStringLeft[400];
  34. char currentStringRight[400];
  35. strcpy(currentStringLeft, currentString);
  36. strcpy(currentStringRight, currentString);
  37. strcat(currentStringLeft, "0");
  38. strcat(currentStringRight, "1");
  39.  
  40. treeTraversal(node->left, codeMap, codes, lastIndex, currentStringLeft);
  41. treeTraversal(node->right, codeMap, codes, lastIndex, currentStringRight);
  42. }
  43.  
  44. void addStringToBuffer(unsigned char buffer[], int *bufferPivot, char string[]) {
  45. char *stringPivot = string;
  46. int stringLength = strlen(string);
  47.  
  48.  
  49. for (int i = 0; i < stringLength; i++) {
  50. if (string[i] == '0') {
  51. buffer[*bufferPivot / 8] &= ~(1 << (7 - (*bufferPivot % 8)));
  52. } else {
  53. buffer[*bufferPivot / 8] |= (1 << (7 - (*bufferPivot % 8)));
  54. }
  55. (*bufferPivot)++;
  56. stringPivot++;
  57. }
  58. }
  59.  
  60.  
  61. void printError(char *errorMessage) {
  62. printf("Error: %s!", errorMessage);
  63. exit(1);
  64. }
  65.  
  66. char getBit(unsigned char byte, char i) {
  67. return '0' + ((byte >> (7 - i)) & 1);
  68. }
  69.  
  70.  
  71. void create(char *archiveName, int argc, char **argv) {
  72. FILE *currentFile;
  73.  
  74. // check for files
  75. for (int fileIndex = 0; fileIndex < argc - 4; fileIndex++) {
  76. currentFile = NULL;
  77. currentFile = fopen(argv[fileIndex + 4], "rb");
  78. if (currentFile == NULL) {
  79. printf("File %s not found!\n",argv[fileIndex + 4]);
  80. printError("FILE NOT FOUND");
  81. }
  82. fclose(currentFile);
  83. }
  84.  
  85.  
  86. int fileSizes[argc - 4];
  87.  
  88. unsigned char currentByte;
  89. int c;
  90. int nodeFound;
  91.  
  92. // initialize the array of byte frequencies and fill it with zeros
  93. short nodesNumber = 0;
  94. Node * nodes[512];
  95. int active[512];
  96. for (int i = 0; i < 512; i++) active[i] = 0;
  97.  
  98. // reading every file
  99. for (int currentFileIndex = 0; currentFileIndex < argc - 4; currentFileIndex++) {
  100.  
  101. // opening the file
  102. currentFile = NULL;
  103. currentFile = fopen(argv[currentFileIndex + 4], "rb");
  104.  
  105. fileSizes[currentFileIndex] = 0;
  106.  
  107. // creating the initial set of nodes
  108. while ((c = getc(currentFile)) != EOF) {
  109.  
  110. fileSizes[currentFileIndex]++;
  111. currentByte = c;
  112. nodeFound = 0;
  113.  
  114. for (int i = 0; i < nodesNumber; i++) {
  115. if (nodes[i]->byte == currentByte) {
  116. nodes[i]->frequency++;
  117. nodeFound = 1;
  118. break;
  119. }
  120. }
  121.  
  122. if (!nodeFound) {
  123. nodes[nodesNumber] = malloc(sizeof(Node));
  124. active[nodesNumber] = 1;
  125. nodes[nodesNumber]->byte = currentByte;
  126. nodes[nodesNumber]->frequency = 1;
  127. nodes[nodesNumber]->left = NULL;
  128. nodes[nodesNumber]->right = NULL;
  129. nodesNumber++;
  130. }
  131. }
  132.  
  133. fclose(currentFile);
  134. }
  135.  
  136.  
  137. // BUILDING HUFFMAN CODE
  138.  
  139. int lastIndex = nodesNumber;
  140. Node *firstMin, *secondMin;
  141. int firstMinValue, secondMinValue;
  142. int firstMinIndex, secondMinIndex;
  143.  
  144. for (int iter = 0; iter < nodesNumber - 1; iter++) {
  145. firstMinValue = INT_MAX;
  146. secondMinValue = INT_MAX;
  147.  
  148. // find the 1st min
  149. for (int node = 0; node < lastIndex; node++) {
  150. if (active[node] && nodes[node] != NULL && nodes[node]->frequency < firstMinValue) {
  151. firstMinValue = nodes[node]->frequency;
  152. firstMin = nodes[node];
  153. firstMinIndex = node;
  154. }
  155. }
  156.  
  157. // find the 2nd min
  158. for (int node = 0; node < lastIndex; node++) {
  159. if (active[node] && nodes[node] != NULL && nodes[node]->frequency < secondMinValue && firstMinIndex != node) {
  160. secondMinValue = nodes[node]->frequency;
  161. secondMin = nodes[node];
  162. secondMinIndex = node;
  163. }
  164. }
  165.  
  166. // create a new node and add it to the list of nodes
  167. active[firstMinIndex] = 0;
  168. active[secondMinIndex] = 0;
  169. nodes[lastIndex] = malloc(sizeof(Node));
  170. nodes[lastIndex]->left = firstMin;
  171. nodes[lastIndex]->right = secondMin;
  172. nodes[lastIndex]->frequency = nodes[firstMinIndex]->frequency + nodes[secondMinIndex]->frequency;
  173. active[lastIndex] = 1;
  174. lastIndex++;
  175.  
  176. }
  177.  
  178. // find the root of the heap
  179. Node *root;
  180. for (int i = 0; i < lastIndex; i++) {
  181. if (active[i]) {
  182. root = nodes[i];
  183. break;
  184. }
  185. }
  186.  
  187. int initialSize = 0;
  188. for (int i = 0; i < argc - 4; i++) initialSize += fileSizes[i];
  189.  
  190. // tree traversal
  191. unsigned char codeMap[nodesNumber];
  192. int codeMapLastIndex = 0;
  193. char codeMapCodes[nodesNumber][400];
  194. char **somePointer = codeMapCodes;
  195.  
  196. treeTraversal(root, codeMap, codeMapCodes, &codeMapLastIndex, "");
  197. char encodingTable[256][400] = {0};
  198. for (int i = 0; i < codeMapLastIndex; i++) strcpy(encodingTable[codeMap[i]], codeMapCodes[i]);
  199.  
  200. // CREATING ARCHIVE
  201.  
  202. FILE *archiveFile = fopen(archiveName, "wb");
  203.  
  204. // archive name size
  205. short archiveFileNameSize = strlen(archiveName) + 1;
  206. fwrite(&archiveFileNameSize, 1, 2, archiveFile);
  207.  
  208. // archive name
  209. fwrite(archiveName, 1, archiveFileNameSize, archiveFile);
  210.  
  211. // number of files
  212. short numberOfFiles = argc - 4;
  213. fwrite(&numberOfFiles, sizeof(short), 1, archiveFile);
  214.  
  215. // file name size, file name, file size
  216. short currentFileNameSize;
  217. for (int i = 0; i < numberOfFiles; i++) {
  218. currentFileNameSize = strlen(argv[i + 4]) + 1;
  219. fwrite(&currentFileNameSize, sizeof(short), 1, archiveFile);
  220. fwrite(argv[i + 4], 1, currentFileNameSize, archiveFile);
  221. fwrite(&fileSizes[i], 1, sizeof(int), archiveFile);
  222. }
  223.  
  224. // codeMap size
  225. fwrite(&nodesNumber, 1, 2, archiveFile);
  226.  
  227. // codeMap: ascii code (1 byte) - size of code (2 byte) - code (? bytes)
  228. short currentCodeSize;
  229. for (int i = 0; i < codeMapLastIndex; i++) {
  230. fwrite(&codeMap[i], 1, 1, archiveFile);
  231. currentCodeSize = strlen(codeMapCodes[i]) + 1;
  232. fwrite(&currentCodeSize, 1, 2, archiveFile);
  233. fwrite(codeMapCodes[i], 1, currentCodeSize, archiveFile);
  234. }
  235.  
  236. // content
  237. unsigned char byteBuffer[20];
  238. int byteBufferPivot = 0;
  239.  
  240.  
  241. for (int currentFileIndex = 0; currentFileIndex < argc - 4; currentFileIndex++) {
  242. FILE *currentFile = fopen(argv[currentFileIndex + 4], "rb");
  243. while ((c = getc(currentFile)) != EOF) {
  244. currentByte = c;
  245.  
  246. addStringToBuffer(byteBuffer, &byteBufferPivot, encodingTable[currentByte]);
  247.  
  248. if (byteBufferPivot > 7) {
  249. for (int i = 0; i < byteBufferPivot / 8; i++) {
  250. putc(byteBuffer[i], archiveFile);
  251. }
  252. byteBuffer[0] = byteBuffer[byteBufferPivot / 8];
  253. byteBufferPivot %= 8;
  254. }
  255. }
  256.  
  257. fclose(currentFile);
  258.  
  259. }
  260.  
  261. putc(byteBuffer[0], archiveFile);
  262. char usefulBitsNumber = byteBufferPivot;
  263. putc(usefulBitsNumber, archiveFile);
  264.  
  265.  
  266. printf("Initial size:\t%d bytes \n", initialSize);
  267. printf("Final size:\t%d bytes \n", ftell(archiveFile));
  268. printf("Compression:\t%.2f%% \n", ftell(archiveFile) * 100 / (double) initialSize);
  269.  
  270. /*
  271. >>> ARCHIVE STRUCTURE
  272.  
  273. size of archive name (2 bytes)
  274. archive name (? bytes)
  275.  
  276. number of files (2 bytes)
  277.  
  278. File #1 name size (2 bytes)
  279. File #1 name (? bytes)
  280. File #1 size (4 bytes)
  281.  
  282. File #2 name size (2 bytes)
  283. File #2 name (? bytes)
  284. File #2 size (4 bytes)
  285.  
  286. ...
  287.  
  288. File #N name size (2 bytes)
  289. File #N name (? bytes)
  290. File #N size (4 bytes)
  291.  
  292.  
  293. codeMap size (2 bytes)
  294.  
  295. codeMap (? bytes):
  296. ascii code (1 byte) - size of code (2 bytes) - code (? bytes)
  297. ascii code (1 byte) - size of code (2 bytes) - code (? bytes)
  298. ...
  299. ascii code (1 byte) - size of code (2 bytes) - code (? bytes)
  300.  
  301. archive content (? bytes)
  302.  
  303. */
  304.  
  305.  
  306. for (int i = 0; i < lastIndex; i++) free(nodes[i]);
  307. fclose(archiveFile);
  308. return 0;
  309. }
  310.  
  311. int extract(char *fileName) {
  312. FILE *archiveFile = NULL;
  313. archiveFile = fopen(fileName, "rb");
  314. if (archiveFile == NULL) printError("ARCHIVE NOT FOUND");
  315.  
  316. // archive size
  317. fseek(archiveFile, 0, SEEK_END);
  318. int archiveSize = ftell(archiveFile);
  319. fseek(archiveFile, 0, SEEK_SET);
  320.  
  321. // archive name
  322. short archiveNameSize;
  323. fread(&archiveNameSize, sizeof(short), 1, archiveFile);
  324. fseek(archiveFile, archiveNameSize, SEEK_CUR);
  325.  
  326. short numberOfFiles;
  327. fread(&numberOfFiles, sizeof(short), 1, archiveFile);
  328.  
  329. // initializing file nameSize, name and size array
  330. short filesNameSize[numberOfFiles];
  331. char filesName[numberOfFiles][2000];
  332. int filesSize[numberOfFiles];
  333.  
  334. // reading file nameSize, name and size array
  335. short currentFileNameSize;
  336. int currentFileSize;
  337. for (int currentFileIndex = 0; currentFileIndex < numberOfFiles; currentFileIndex++) {
  338. fread(&filesNameSize[currentFileIndex], sizeof(short), 1, archiveFile);
  339. fread(&filesName[currentFileIndex], 1, filesNameSize[currentFileIndex], archiveFile);
  340. fread(&filesSize[currentFileIndex], sizeof(int), 1, archiveFile);
  341. }
  342.  
  343.  
  344. for (int i = 0; i < numberOfFiles; i++) printf("%s (%d bytes) \n", filesName[i], filesSize[i]);
  345.  
  346.  
  347. // CODEMAP
  348. short codeMapSize;
  349. fread(&codeMapSize, sizeof(short), 1, archiveFile);
  350. char encodingTable[256][400];
  351. char active[256];
  352. for (int i = 0; i < 256; i++) active[i] = 0;
  353.  
  354. unsigned char currentCodeByte;
  355. short currentCodeSize;
  356.  
  357. for (int i = 0; i < codeMapSize; i++) {
  358. fread(&currentCodeByte, 1, 1, archiveFile);
  359. active[currentCodeByte] = 1;
  360. fread(&currentCodeSize, sizeof(short), 1, archiveFile);
  361. fread(encodingTable[currentCodeByte], 1, currentCodeSize, archiveFile);
  362. }
  363.  
  364. // DECODING
  365. unsigned char currentByte;
  366. char buffer[400];
  367. buffer[0] = '\0';
  368. int bufferPointer = 0;
  369. int byteCount = 0;
  370. int currentSymbol;
  371.  
  372. FILE *currentFile;
  373.  
  374. for (int fileIndex = 0; fileIndex < numberOfFiles; fileIndex++) {
  375. printf("...extracting %s \n", filesName[fileIndex]);
  376. currentFile = fopen(filesName[fileIndex], "wb");
  377. byteCount = 0;
  378.  
  379. while (byteCount < filesSize[fileIndex]) {
  380. currentByte = getc(archiveFile);
  381. for (int bit = 0; bit < 8; bit++) {
  382. buffer[bufferPointer++] = getBit(currentByte, bit);
  383. buffer[bufferPointer] = '\0';
  384.  
  385. if (fileIndex == numberOfFiles - 1 && byteCount == filesSize[fileIndex]) break;
  386.  
  387. for (int code = 0; code < 256; code++) {
  388.  
  389. if (active[code] && strcmp(buffer, encodingTable[code]) == 0) {
  390.  
  391. putc(code, currentFile);
  392. bufferPointer = 0;
  393. buffer[bufferPointer] = '\0';
  394. byteCount++;
  395. break;
  396. }
  397. }
  398. }
  399. }
  400. fclose(currentFile);
  401. }
  402.  
  403. fclose(archiveFile);
  404.  
  405. printf("DONE!\n");
  406. }
  407.  
  408. void list(char *fileName) {
  409. FILE *archiveFile = NULL;
  410. archiveFile = fopen(fileName, "rb");
  411. if (archiveFile == NULL) printError("ARCHIVE NOT FOUND");
  412.  
  413. // archive size
  414. fseek(archiveFile, 0, SEEK_END);
  415. int archiveSize = ftell(archiveFile);
  416. fseek(archiveFile, 0, SEEK_SET);
  417.  
  418. // archive name
  419. short archiveNameSize;
  420. fread(&archiveNameSize, sizeof(short), 1, archiveFile);
  421. char archiveName[archiveNameSize];
  422. fread(archiveName, 1, archiveNameSize, archiveFile);
  423. printf(". . . %s . . . \n\n", archiveName);
  424.  
  425. short numberOfFiles;
  426. fread(&numberOfFiles, sizeof(short), 1, archiveFile);
  427.  
  428. // initializing file nameSize, name and size array
  429. short filesNameSize[numberOfFiles];
  430. char filesName[numberOfFiles][2000];
  431. int filesSize[numberOfFiles];
  432.  
  433. // reading file nameSize, name and size array
  434. short currentFileNameSize;
  435. int currentFileSize;
  436. for (int currentFileIndex = 0; currentFileIndex < numberOfFiles; currentFileIndex++) {
  437. fread(&filesNameSize[currentFileIndex], sizeof(short), 1, archiveFile);
  438. fread(&filesName[currentFileIndex], 1, filesNameSize[currentFileIndex], archiveFile);
  439. fread(&filesSize[currentFileIndex], sizeof(int), 1, archiveFile);
  440.  
  441. printf("> %s (%d bytes)\n\n", filesName[currentFileIndex], filesSize[currentFileIndex]);
  442. }
  443.  
  444. }
  445.  
  446.  
  447. void parseCommand(int argc, char **argv) {
  448. if (argc < 3) printError("Incorrect number of arguments");
  449. if (strcmp(argv[1], "--file") != 0) printError("Incorrect command");
  450.  
  451. if (strcmp(argv[3], "--create") == 0) {
  452. create(argv[2], argc, argv);
  453. return;
  454. }
  455. if (strcmp(argv[3], "--extract") == 0) {
  456. extract(argv[2]);
  457. return;
  458. }
  459. if (strcmp(argv[3], "--list") == 0) {
  460. list(argv[2]);
  461. return;
  462. }
  463.  
  464. printError("Incorrect command");
  465. }
  466.  
  467.  
  468. int main(int argc, char **argv) {
  469.  
  470. parseCommand(argc, argv);
  471.  
  472.  
  473. return 0;
  474. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement