Main Page | Namespace List | Class Hierarchy | Class List | Directories | File List | Namespace Members | Class Members | File Members

validate.cpp

Go to the documentation of this file.
00001 /* 
00002    
00003 * FIMI output validator/sorter program
00004 
00005 Contains parts of:
00006 
00007 
00008  * Cryptographic API.
00009  *
00010  * MD5 Message Digest Algorithm (RFC1321).
00011  *
00012  * Derived from cryptoapi implementation, originally based on the
00013  * public domain implementation written by Colin Plumb in 1993.
00014  *
00015  * Copyright (c) Cryptoapi developers.
00016  * Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
00017  * 
00018  * This program is free software; you can redistribute it and/or modify it
00019  * under the terms of the GNU General Public License as published by the Free
00020  * Software Foundation; either version 2 of the License, or (at your option) 
00021  * any later version.
00022  *
00023  */
00024 
00025 
00026 #include <stdio.h>
00027 #include <getopt.h>
00028 
00029 #include <stdlib.h>
00030 #include <string.h>
00031 #include <unistd.h>
00032 #include <inttypes.h>
00033 
00034 typedef uint8_t u8;
00035 typedef uint32_t u32;
00036 typedef uint64_t u64;
00037 
00038 
00039 
00040 // ---------------------- MD5 CHECKSUM ROUTINE ------------------
00041 
00042 /*
00043 
00044 WORKS ONLY ON LITTLE ENDIAN CPUS
00045 
00046 */
00047 
00048 #define __le32_to_cpus(buf)
00049 #define __cpu_to_le32s(buf)
00050 
00051 
00052 
00053 #define MD5_DIGEST_SIZE         16
00054 #define MD5_HMAC_BLOCK_SIZE     64
00055 #define MD5_BLOCK_WORDS         16
00056 #define MD5_HASH_WORDS          4
00057 
00058 #define F1(x, y, z)     (z ^ (x & (y ^ z)))
00059 #define F2(x, y, z)     F1(z, x, y)
00060 #define F3(x, y, z)     (x ^ y ^ z)
00061 #define F4(x, y, z)     (y ^ (x | ~z))
00062 
00063 #define MD5STEP(f, w, x, y, z, in, s) \
00064         (w += f(x, y, z) + in, w = (w<<s | w>>(32-s)) + x)
00065 
00066 struct md5_ctx {
00067         u32 hash[MD5_HASH_WORDS];
00068         u32 block[MD5_BLOCK_WORDS];
00069         u64 byte_count;
00070 };
00071 
00072 static void md5_transform(u32 *hash, u32 const *in)
00073 {
00074         u32 a, b, c, d;
00075 
00076         a = hash[0];
00077         b = hash[1];
00078         c = hash[2];
00079         d = hash[3];
00080 
00081         MD5STEP(F1, a, b, c, d, in[0] + 0xd76aa478, 7);
00082         MD5STEP(F1, d, a, b, c, in[1] + 0xe8c7b756, 12);
00083         MD5STEP(F1, c, d, a, b, in[2] + 0x242070db, 17);
00084         MD5STEP(F1, b, c, d, a, in[3] + 0xc1bdceee, 22);
00085         MD5STEP(F1, a, b, c, d, in[4] + 0xf57c0faf, 7);
00086         MD5STEP(F1, d, a, b, c, in[5] + 0x4787c62a, 12);
00087         MD5STEP(F1, c, d, a, b, in[6] + 0xa8304613, 17);
00088         MD5STEP(F1, b, c, d, a, in[7] + 0xfd469501, 22);
00089         MD5STEP(F1, a, b, c, d, in[8] + 0x698098d8, 7);
00090         MD5STEP(F1, d, a, b, c, in[9] + 0x8b44f7af, 12);
00091         MD5STEP(F1, c, d, a, b, in[10] + 0xffff5bb1, 17);
00092         MD5STEP(F1, b, c, d, a, in[11] + 0x895cd7be, 22);
00093         MD5STEP(F1, a, b, c, d, in[12] + 0x6b901122, 7);
00094         MD5STEP(F1, d, a, b, c, in[13] + 0xfd987193, 12);
00095         MD5STEP(F1, c, d, a, b, in[14] + 0xa679438e, 17);
00096         MD5STEP(F1, b, c, d, a, in[15] + 0x49b40821, 22);
00097 
00098         MD5STEP(F2, a, b, c, d, in[1] + 0xf61e2562, 5);
00099         MD5STEP(F2, d, a, b, c, in[6] + 0xc040b340, 9);
00100         MD5STEP(F2, c, d, a, b, in[11] + 0x265e5a51, 14);
00101         MD5STEP(F2, b, c, d, a, in[0] + 0xe9b6c7aa, 20);
00102         MD5STEP(F2, a, b, c, d, in[5] + 0xd62f105d, 5);
00103         MD5STEP(F2, d, a, b, c, in[10] + 0x02441453, 9);
00104         MD5STEP(F2, c, d, a, b, in[15] + 0xd8a1e681, 14);
00105         MD5STEP(F2, b, c, d, a, in[4] + 0xe7d3fbc8, 20);
00106         MD5STEP(F2, a, b, c, d, in[9] + 0x21e1cde6, 5);
00107         MD5STEP(F2, d, a, b, c, in[14] + 0xc33707d6, 9);
00108         MD5STEP(F2, c, d, a, b, in[3] + 0xf4d50d87, 14);
00109         MD5STEP(F2, b, c, d, a, in[8] + 0x455a14ed, 20);
00110         MD5STEP(F2, a, b, c, d, in[13] + 0xa9e3e905, 5);
00111         MD5STEP(F2, d, a, b, c, in[2] + 0xfcefa3f8, 9);
00112         MD5STEP(F2, c, d, a, b, in[7] + 0x676f02d9, 14);
00113         MD5STEP(F2, b, c, d, a, in[12] + 0x8d2a4c8a, 20);
00114 
00115         MD5STEP(F3, a, b, c, d, in[5] + 0xfffa3942, 4);
00116         MD5STEP(F3, d, a, b, c, in[8] + 0x8771f681, 11);
00117         MD5STEP(F3, c, d, a, b, in[11] + 0x6d9d6122, 16);
00118         MD5STEP(F3, b, c, d, a, in[14] + 0xfde5380c, 23);
00119         MD5STEP(F3, a, b, c, d, in[1] + 0xa4beea44, 4);
00120         MD5STEP(F3, d, a, b, c, in[4] + 0x4bdecfa9, 11);
00121         MD5STEP(F3, c, d, a, b, in[7] + 0xf6bb4b60, 16);
00122         MD5STEP(F3, b, c, d, a, in[10] + 0xbebfbc70, 23);
00123         MD5STEP(F3, a, b, c, d, in[13] + 0x289b7ec6, 4);
00124         MD5STEP(F3, d, a, b, c, in[0] + 0xeaa127fa, 11);
00125         MD5STEP(F3, c, d, a, b, in[3] + 0xd4ef3085, 16);
00126         MD5STEP(F3, b, c, d, a, in[6] + 0x04881d05, 23);
00127         MD5STEP(F3, a, b, c, d, in[9] + 0xd9d4d039, 4);
00128         MD5STEP(F3, d, a, b, c, in[12] + 0xe6db99e5, 11);
00129         MD5STEP(F3, c, d, a, b, in[15] + 0x1fa27cf8, 16);
00130         MD5STEP(F3, b, c, d, a, in[2] + 0xc4ac5665, 23);
00131 
00132         MD5STEP(F4, a, b, c, d, in[0] + 0xf4292244, 6);
00133         MD5STEP(F4, d, a, b, c, in[7] + 0x432aff97, 10);
00134         MD5STEP(F4, c, d, a, b, in[14] + 0xab9423a7, 15);
00135         MD5STEP(F4, b, c, d, a, in[5] + 0xfc93a039, 21);
00136         MD5STEP(F4, a, b, c, d, in[12] + 0x655b59c3, 6);
00137         MD5STEP(F4, d, a, b, c, in[3] + 0x8f0ccc92, 10);
00138         MD5STEP(F4, c, d, a, b, in[10] + 0xffeff47d, 15);
00139         MD5STEP(F4, b, c, d, a, in[1] + 0x85845dd1, 21);
00140         MD5STEP(F4, a, b, c, d, in[8] + 0x6fa87e4f, 6);
00141         MD5STEP(F4, d, a, b, c, in[15] + 0xfe2ce6e0, 10);
00142         MD5STEP(F4, c, d, a, b, in[6] + 0xa3014314, 15);
00143         MD5STEP(F4, b, c, d, a, in[13] + 0x4e0811a1, 21);
00144         MD5STEP(F4, a, b, c, d, in[4] + 0xf7537e82, 6);
00145         MD5STEP(F4, d, a, b, c, in[11] + 0xbd3af235, 10);
00146         MD5STEP(F4, c, d, a, b, in[2] + 0x2ad7d2bb, 15);
00147         MD5STEP(F4, b, c, d, a, in[9] + 0xeb86d391, 21);
00148 
00149         hash[0] += a;
00150         hash[1] += b;
00151         hash[2] += c;
00152         hash[3] += d;
00153 }
00154 
00155 /* XXX: this stuff can be optimized */
00156 static inline void le32_to_cpu_array(u32 *buf, unsigned int words)
00157 {
00158         while (words--) {
00159                 __le32_to_cpus(buf);
00160                 buf++;
00161         }
00162 }
00163 
00164 static inline void cpu_to_le32_array(u32 *buf, unsigned int words)
00165 {
00166         while (words--) {
00167                 __cpu_to_le32s(buf);
00168                 buf++;
00169         }
00170 }
00171 
00172 static inline void md5_transform_helper(struct md5_ctx *ctx)
00173 {
00174         le32_to_cpu_array(ctx->block, sizeof(ctx->block) / sizeof(u32));
00175         md5_transform(ctx->hash, ctx->block);
00176 }
00177 
00178 static void md5_init(void *ctx)
00179 {
00180         struct md5_ctx *mctx = (struct md5_ctx *)ctx;
00181 
00182         mctx->hash[0] = 0x67452301;
00183         mctx->hash[1] = 0xefcdab89;
00184         mctx->hash[2] = 0x98badcfe;
00185         mctx->hash[3] = 0x10325476;
00186         mctx->byte_count = 0;
00187 }
00188 
00189 static void md5_update(void *ctx, const u8 *data, unsigned int len)
00190 {
00191         struct md5_ctx *mctx = (struct md5_ctx *)ctx;
00192         const u32 avail = sizeof(mctx->block) - (mctx->byte_count & 0x3f);
00193 
00194         mctx->byte_count += len;
00195 
00196         if (avail > len) {
00197                 memcpy((char *)mctx->block + (sizeof(mctx->block) - avail),
00198                        data, len);
00199                 return;
00200         }
00201 
00202         memcpy((char *)mctx->block + (sizeof(mctx->block) - avail),
00203                data, avail);
00204 
00205         md5_transform_helper(mctx);
00206         data += avail;
00207         len -= avail;
00208 
00209         while (len >= sizeof(mctx->block)) {
00210                 memcpy(mctx->block, data, sizeof(mctx->block));
00211                 md5_transform_helper(mctx);
00212                 data += sizeof(mctx->block);
00213                 len -= sizeof(mctx->block);
00214         }
00215 
00216         memcpy(mctx->block, data, len);
00217 }
00218 
00219 static void md5_final(void *ctx, u8 *out)
00220 {
00221         struct md5_ctx *mctx = (struct md5_ctx *)ctx;
00222         const unsigned int offset = mctx->byte_count & 0x3f;
00223         char *p = (char *)mctx->block + offset;
00224         int padding = 56 - (offset + 1);
00225 
00226         *p++ = 0x80;
00227         if (padding < 0) {
00228                 memset(p, 0x00, padding + sizeof (u64));
00229                 md5_transform_helper(mctx);
00230                 p = (char *)mctx->block;
00231                 padding = 56;
00232         }
00233 
00234         memset(p, 0, padding);
00235         mctx->block[14] = mctx->byte_count << 3;
00236         mctx->block[15] = mctx->byte_count >> 29;
00237         le32_to_cpu_array(mctx->block, (sizeof(mctx->block) -
00238                           sizeof(u64)) / sizeof(u32));
00239         md5_transform(mctx->hash, mctx->block);
00240         cpu_to_le32_array(mctx->hash, sizeof(mctx->hash) / sizeof(u32));
00241         memcpy(out, mctx->hash, sizeof(mctx->hash));
00242         memset(mctx, 0, sizeof(*mctx));
00243 }
00244 
00245 
00246 //--------------- END OF MD5 CHECKSUM ROUTINE ------------------
00247 
00248 
00249 #include <vector>
00250 using namespace std;
00251 #include <algorithm>
00252 
00253 void usage(char *s) {
00254   fprintf(stderr,"Usage: %s [-h] infilename\n",s);
00255   fprintf(stderr,"Computes a checksum from the FIMI output file infilename\n");
00256   fprintf(stderr,"-h prints this help text\n");
00257   exit(1);
00258 }
00259 
00260 char *infilename;
00261 
00262 void parseparams(int argc, char *argv[]) {
00263   char ch;
00264   while ((ch=getopt(argc, argv, "h"))>=0) {
00265     switch(ch) {
00266     case 'h':
00267       usage(argv[0]);
00268       break;
00269     case '?':
00270       fprintf(stderr,"unknown option %c\n",optopt);
00271       usage(argv[0]);
00272       break;
00273     default:
00274       fprintf(stderr,"unknown option %c\n",ch);
00275       usage(argv[0]);
00276       break;
00277     }      
00278   }
00279   if (optind>=argc) {
00280     usage(argv[0]);
00281   }
00282 }
00283 
00284 
00285 bool parse_fimi_line(int & supp, vector<int> &itemset, char *linebuf) {
00286   char *bptr;
00287   itemset.clear();
00288   bptr=linebuf;
00289   while ((*bptr) && (*bptr!='(') && (*bptr != '\n')) {
00290     int lastnum=0;
00291     if (!((*bptr >='0')&&(*bptr<='9'))) {
00292       fprintf(stderr,"Input file format error: Expecting number, got %c Erroneous line:\n%s",*bptr,linebuf);
00293       return false;
00294     }
00295     while ((*bptr >='0')&&(*bptr<='9')) {
00296       lastnum*=10;
00297       lastnum+=*bptr-'0';
00298       bptr++;
00299     }
00300     itemset.push_back(lastnum);
00301     while (*bptr==' ') {
00302       bptr++;
00303     }
00304   }
00305   if (*bptr!='(') {
00306     fprintf(stderr,"Input file format error. Expecting (, got %c. Erroneous line:\n%s",*bptr,linebuf);
00307     return false;
00308   }
00309   bptr++;
00310   int lastnum=0;
00311   while ((*bptr >='0')&&(*bptr<='9')) {
00312     lastnum*=10;
00313     lastnum+=*bptr-'0';
00314     bptr++;
00315   }
00316   if (*bptr!=')') {
00317     fprintf(stderr,"Input file format error. Expecting ), got %c. Erroneous line:\n%s",*bptr,linebuf);
00318     return false;
00319   }
00320   bptr++;
00321   if (*bptr!='\n') {
00322     fprintf(stderr,"Input file format error. Expecting newline, got %c. Erroneous line:\n%s",*bptr,linebuf);
00323     return false;
00324   }
00325   supp=lastnum;
00326   return true;
00327 }
00328 
00329 void printsum(uint32_t* sum) {
00330   static char* hexdigits="0123456789abcdef";
00331   for(size_t i=0;i<MD5_HASH_WORDS;++i) {
00332     for (int q=7;q>=0;--q) {
00333       int c=sum[i];
00334       c>>=(q*4);
00335       c&=15;
00336       putchar(hexdigits[c]);
00337     }
00338   }
00339 }
00340 
00341 bool checksumfile(char *name) {
00342   char buffer[128*1024];
00343   FILE *fin = fopen(name,"r");
00344   if (!fin) {
00345     perror("open");
00346     return false;
00347   }
00348   setbuffer(fin,buffer,sizeof(buffer));
00349   struct md5_ctx ctx;
00350   uint32_t finalsum1[MD5_HASH_WORDS];
00351   uint32_t finalsum2[MD5_HASH_WORDS];
00352   bzero(finalsum1,MD5_HASH_WORDS*sizeof(int));
00353   bzero(finalsum2,MD5_HASH_WORDS*sizeof(int));
00354   char linebuf[10000];
00355   vector<int> itemset;
00356   int supp;
00357   int lines=0;
00358   while(!feof(fin)) {
00359     uint32_t currsum[MD5_HASH_WORDS];
00360     if (!fgets(linebuf,sizeof(linebuf),fin)) {
00361       break;
00362     }
00363     parse_fimi_line(supp,itemset,linebuf);
00364     md5_init(&ctx);
00365     md5_update(&ctx,(u8*)&supp,sizeof(supp));
00366     sort(itemset.begin(),itemset.end());
00367     md5_update(&ctx,(u8*)&(itemset[0]),itemset.size()*sizeof(int));
00368     md5_final(&ctx,(u8*)currsum);
00369     for(size_t i=0;i<MD5_HASH_WORDS;++i) {
00370       finalsum1[i]^=currsum[i];
00371       finalsum2[i]+=currsum[i];
00372     }
00373     lines++;
00374   }
00375   for (int i=0;i<MD5_HASH_WORDS;++i) {
00376     finalsum1[i]^=finalsum2[i];
00377   }
00378   printsum(finalsum1);
00379   printf(" (%d lines) %s\n",lines,name);
00380   return true;
00381 }
00382 
00383 
00384 int main(int argc, char *argv[]) {
00385   parseparams(argc,argv);
00386   while(optind<argc){
00387     if (!checksumfile(argv[optind++])) {
00388       exit(1);
00389     }
00390   }
00391   return 0;
00392 }

Generated on Sun Sep 17 17:50:40 2006 for FIM environment by  doxygen 1.4.4