Click here to Skip to main content
15,035,723 members
Please Sign up or sign in to vote.
1.00/5 (2 votes)
data compression pro

C#
#include<stdio.h>
#include<conio.h>
#include<string.h>
#include<iostream.h>
void fsize(FILE *fp);
void fcal(FILE *fp);
void fa_z(FILE *fp);
char r;
long char c;
void main()
{


    FILE *fp;
    clrscr();

    fp=fopen("file name.txt","r");
    fcal(fp);
    rewind(fp);
    fsize(fp);
    rewind(fp);
    fa_z(fp);
    fclose(fp);
    getch();
}

void fsize(FILE *fp)/*this calculate file size*/
{
    for(;fscanf(fp,"%c",&r)!=EOF;);
    long size;
    size=ftell(fp);
    printf("\n\n the size of file=%d\n",size);
}

void fcal(FILE *fp)/*this calculate character,word,line*/
{
    int ch=0,word=1,line=1;
    for(;fscanf(fp,"%c",&r)!=EOF;)
    {
        ch++;
        printf("%c",r);
        c=r;
        if(c=='\n'||c=='\t'||c==' ')
        ch--;
        if(c=='\n')
        line++;
        if(c=='\n'||c=='\t'||c==' '||c=='.'||c==',')
        word++;

    }
    printf("\n\n total character=%d",ch);
    printf("\n\n total word =%d",word);
    printf("\n\n total line=%d",line);

}
void fa_z(FILE *fp)/*this cal how many a-z*/
{
    char a[27];
    int i;
    int n[26];
    for(i=0;i<26;i++)
    {
        n[i]=0;
    }
    while((c=fgetc(fp))!=EOF)
    {
        if(c=='a')
        n[0]=n[0]+1;
        else if(c=='b')
        n[1]=n[1]+1;
        else if(c=='c')
        n[2]=n[2]+1;
        else if(c=='d')
        n[3]=n[3]+1;
        else if(c=='e')
        n[4]=n[4]+1;
        else if(c=='f')
        n[5]=n[5]+1;
        else if(c=='g')
        n[6]=n[6]+1;
        else if(c=='h')
        n[7]=n[7]+1;
        else if(c=='i')
        n[8]=n[8]+1;
        else if(c=='j')
        n[9]=n[9]+1;
        else if(c=='k')
        n[10]=n[10]+1;
        else if(c=='l')
        n[11]=n[11]+1;
        else if(c=='m')
        n[12]=n[12]+1;
        else if(c=='n')
        n[13]=n[13]+1;
        else if(c=='o')
        n[14]=n[14]+1;
        else if(c=='p')
        n[15]=n[15]+1;
        else if(c=='q')
        n[16]=n[16]+1;
        else if(c=='r')
        n[17]=n[17]+1;
        else if(c=='s')
        n[18]=n[18]+1;
        else if(c=='t')
        n[19]=n[19]+1;
        else if(c=='u')
        n[20]=n[20]+1;
        else if(c=='v')
        n[21]=n[21]+1;
        else if(c=='w')
        n[22]=n[22]+1;
        else if(c=='x')
        n[23]=n[23]+1;
        else if(c=='y')
        n[24]=n[24]+1;
        else if(c=='z')
        n[25]=n[25]+1;
    }
    char j;
    printf("\n");
    for(i=0,j='a';i<26,j<='z';i++,j++)
    {
        printf("%c=%d\t",j,n[i]);
    }
}


that program tell us character word line and size of file
and also show that how many a-z word use in program
but this couldn't be loss less the file size..

how to better work on this...
Posted
Updated 15-Mar-15 0:36am
v3

1) replace your if-else-cascade in void fa_z(FILE *fp) with a switch-statement.
2) replace the switch-statement in void fa_z(FILE *fp) with an array or dictionary* and no case-distinction at all.
3) sort the result of void fa_z(FILE *fp) by frequency.
4) build a huffman-tree from the frequency-results.
5) iterate the input-file byte-by-byte and build a compressed outputstream by replacing the input-bytes by their bit-representation that results from walking the huffman-tree to the according input-byte-value.
6) save the huffman-tree along with the compressed outputstream in a file.
7) build the decompression-function.

Edit:

* : and use the value of the input-byte as the array-index / as the dictionary-key instead of having manually assigned indices for all different input-byte-values.

The duplicate replacement suggestion of points 1+2 isn't meant seriously. But you should use switch-statements (not in this case!) instead of very long if-else if-statements whenever the predicate is suitable for the switch-statement.

Further reading:
http://en.wikipedia.org/wiki/Huffman_coding[^]
https://www.siggraph.org/education/materials/HyperGraph/video/mpeg/mpegfaq/huffman_tutorial.html[^]

Another Edit:
Solution 2 demonstrates what I meant by the note above marked with *
   
v4
You can start with replacing the while loop in
C++
void fa_z(FILE *fp)
with

C++
int index = 0;
while ((c = fgetc(fp)) != EOF)
{
    if (c >= 'a' && c <= 'z')
    {
        index = (int)(c - 'a');
        n[index]++;
    }
}


Assuming c is an int and the array n has been initialized to 0.
   

This content, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)




CodeProject, 20 Bay Street, 11th Floor Toronto, Ontario, Canada M5J 2N8 +1 (416) 849-8900