1. File Operations in C

    GNU C Library Reference Table of Contents

    C Tutorial

    Copy a file from standard in to standard out byte by byte. #include <stdio.h> int main() { int c; while ((c=fgetc(stdin))!=EOF) fputc(c,stdout); }

    formatted output: #include <stdio.h> int main () { int i=999; double x= 99.9; char string[] = "Hello World"; char byte = 'A'; printf("i=%d x=%f string=%s byte=%c\m", i,x,string,byte); return 0; }

    formatted input: #include <stdio.h> int main() { int i; double x; string[128] char byte; scanf("%d",&i); scanf("%lf",&x); scanf("%c",&byte); scanf("%s",string); return 0; }

    Copy a file from standard in to standard out eliminating multiple blanks: #include <stdio.h> int main() { int flg=0,c; while((c=fgetc(stdin))!=EOF) { if (c!=' ') { fputc(c,stdout); flg=0; } else { if (flg) continue; fputc(c,stdout); flg=1; } } }

    Read lines from standard in and write them to standard out: #include <stdio.h> int main () { char buf[128]; while (fgets(buf,128,stdin)!=NULL) fputs(buf,stdout); }

    Open two files and copy one to the other: #include <stdio.h> #include <stdlib.h> int main() { char fname1[128],fname2[128]; FILE *file1,*file2; int c; printf("Enter file 1 name: "); if(fgets(fname1,128,stdin)==NULL) return EXIT_FAILURE; if (strlen(fname1)>1) fname1[strlen(fname1)-1]='\0'; else { printf("Bad file name 1\n"); return EXIT_FAILURE;} } printf("Enter file 2 name: "); if(fgets(fname2,128,stdin)==NULL) return EXIT_FAILURE; if (strlen(fname2)>1) fname2[strlen(fname2)-1]='\0'; else { printf("Bad file name 1\n"); return EXIT_FAILURE;} } file1=fopen(fname1,"r"); file2=fopen(fname2,"w"); if (file1==NULL) { printf("Error on file 1\n"); return EXIT_FAILURE; } if (file2==NULL) { printf("Error on file 1\n"); return EXIT_FAILURE; } while ((c=fgetc(file1))!=EOF) fputc(c,file2); fclose(file2); return EXIT_SUCCESS;

    The following is a small program that opens a pre-existing file of 128 byte records. It accesses the current position in the file then reads a record (thus advancing thru the file). It checks to see if the record read contains ABC in the first three positions. If so, it replaces these with abc, repositions the file back to where it was when it read the record and writes out the modified record. #define _FILE_OFFSET_BITS 64 #define _LARGE_FILE_SUPPORT #include <stdio.h> #include <stdlib.h> int main() { char buf[128]; FILE *file1; fpos_t fptr; file1=fopen("myfile.dat,"rb+"); // open for read, binary, update (+) if (file1==NULL) { printf("file error\n"); return EXIT_FAILURE; } while (1) { fgetpos(file1,&fptr); // get current file position if (fread(buf,128,1,file1)==0) break; // read a record if ( strncmp(buf,"ABC",3)==0) { // test first 3 chars fsetpos(file1,&fptr); // reposition file buf[0]='a; buf[1]='b'; buf[2]='c'; fwrite(buf,128,1,file1); // re-write record. } } fclose(file1); return EXIT_SUCCESS; }

    Read in a file of data input and write the data to a structured disk file. The data consists of lines with comma delimited fields containing: nameLast,nameFirst,telephone-number #include <stdio.h> #include <stdlib.h> #define NAME_SIZE 32 #define TEL_SIZE 15 #define FNAME 128 #define BUF_SIZE 128 struct Rec { char nameLast[NAMES_SIZE]; char nameFirst[NAMES_SIZE]; char Tel[TEL_SIZE]; }; int main() { char fname1[FNAME],fname2[FNAME]; FILE *file1,*file2; char buf[BUF_SIZE]; char *p1; struct Rec record; printf("Enter file 1 name: "); if(fgets(fname1,FNAME,stdin)==NULL) return EXIT_FAILURE; if (strlen(fname1)>1) fname1[strlen(fname1)-1]='\0'; else { printf("Bad file name 1\n"); return EXIT_FAILURE;} } printf("Enter file 2 name: "); if(fgets(fname2,FNAME,stdin)==NULL) return EXIT_FAILURE; if (strlen(fname2)>1) fname2[strlen(fname2)-1]='\0'; else { printf("Bad file name 1\n"); return EXIT_FAILURE;} } file1=fopen(fname1,"r"); file2=fopen(fname2,"w"); if (file1==NULL) { printf("Error on file 1\n"); return EXIT_FAILURE; } if (file2==NULL) { printf("Error on file 2\n"); return EXIT_FAILURE; } while (fgets(buf,BUF_SIZE,stdin)!=NULL) { p1=strtok(buf,","); if (p1==NULL || strlen(p1)>NAME_SIZE-1) { printf("Error on input\n"); return EXIT_FAILURE; } strcpy(record.nameLast,p1); p1=strtok(NULL,","); if (p1==NULL || strlen(p1)>NAME_SIZE-1) { printf("Error on input\n"); return EXIT_FAILURE; } strcpy(record.nameFirst,p1); p1=strtok(NULL,","); if (p1==NULL || strlen(p1)>TEL_SIZE-1) { printf("Error on input\n"); return EXIT_FAILURE; } strcpy(record.Tel,p1); fwrite(&record,sizeof(struct Rec),1,file2); } fclose(file2); return EXIT_SUCCESS; }

    Write a program to read in a last name then search the file created in the above for all records with that last name. #include <stdio.h> #include <stdlib.h> #define NAME_SIZE 32 #define TEL_SIZE 15 #define FNAME 128 #define BUF_SIZE 128 struct Rec { char nameLast[NAMES_SIZE]; char nameFirst[NAMES_SIZE]; char Tel[TEL_SIZE]; }; int main() { char lastName[NAME_SIZE],fname2[FNAME]; FILE *file1; char buf[BUF_SIZE]; char *p1; struct Rec record; printf("Enter last name: "); if(fgets(lastName,NAME_SIZE,stdin)==NULL) return EXIT_FAILURE; if (strlen(lastName)>1) lastName[strlen(lastName)-1]='\0'; else { printf("Bad name 1\n"); return EXIT_FAILURE;} } printf("Enter file name: "); if(fgets(fname2,FNAME,stdin)==NULL) return EXIT_FAILURE; if (strlen(fname2)>1) fname2[strlen(fname2)-1]='\0'; else { printf("Bad file name 1\n"); return EXIT_FAILURE;} } file1=fopen(fname2,"r"); if (file1==NULL) { printf("Error on file 1\n"); return EXIT_FAILURE; } while ( fread(&record,sizeof(Rec),1,file1) !=0 ) { if( strcmp(lastName,record.nameLast)==0) printf("%s %s %s\n",record.nameLast,record.nameFirst,record.Tel); } return EXIT_SUCCESS; }

    The following is a dumb program that opens a pre-existing file of 128 byte records. It accesses the current position in the file then reads a record (thus advancing thru the file). It checks to see if the record read contains ABC in the first three positions. If so, it replaces these with abc, repositions the file back to where it was when it read the record and writes out the modified record. The purpose of this example is to show 64 bit file addressing and file repositioning. #define _FILE_OFFSET_BITS 64 #define _LARGE_FILE_SUPPORT #include <stdio.h> #include <stdlib.h> int main() { char buf[128]; FILE *file1; fpos_t fptr; file1=fopen("myfile.dat,"rb+"); // open for read, binary, update (+) if (file1==NULL) { printf("file error\n"); return EXIT_FAILURE; } while (1) { fgetpos(file1,&fptr); // get current file position if (fread(buf,128,1,file1)==0) break; // read a record if ( strncmp(buf,"ABC",3)==0) { // test first 3 chars fsetpos(file1,&fptr); // reposition file buf[0]='a; buf[1]='b'; buf[2]='c'; fwrite(buf,128,1,file1); // re-write record. } } fclose(file1); return EXIT_SUCCESS; }

    Re-write the pervious telephone program so that first name and last name and a new telephone number are read and the new telephone number updates the data base. #define _FILE_OFFSET_BITS 64 #define _LARGE_FILE_SUPPORT #include <stdio.h> #include <stdlib.h> #define NAME_SIZE 32 #define TEL_SIZE 15 #define FNAME 128 #define BUF_SIZE 128 struct Rec { char nameLast[NAMES_SIZE]; char nameFirst[NAMES_SIZE]; char Tel[TEL_SIZE]; }; int main() { char firstName[NAME_SIZE],telnbr[TEL_SIZE],lastName[NAME_SIZE],fname2[FNAME]; FILE *file1; char buf[BUF_SIZE]; char *p1; struct Rec record; fpos_t fptr; printf("Enter last name: "); if(fgets(lastName,NAME_SIZE,stdin)==NULL) return EXIT_FAILURE; if (strlen(lastName)>1) lastName[strlen(lastName)-1]='\0'; else { printf("Bad name 1\n"); return EXIT_FAILURE;} } printf("Enter first name: "); if(fgets(firstName,NAME_SIZE,stdin)==NULL) return EXIT_FAILURE; if (strlen(firstName)>1) lastName[strlen(firstName)-1]='\0'; else { printf("Bad name 1\n"); return EXIT_FAILURE;} } printf("Enter new phone number: "); if(fgets(telnbr,TEL_SIZE,stdin)==NULL) return EXIT_FAILURE; if (strlen(telnbr)>1) telnbr[strlen(telnbr)-1]='\0'; else { printf("Bad name 1\n"); return EXIT_FAILURE;} } printf("Enter file name: "); if(fgets(fname2,FNAME,stdin)==NULL) return EXIT_FAILURE; if (strlen(fname2)>1) fname2[strlen(fname2)-1]='\0'; else { printf("Bad file name 1\n"); return EXIT_FAILURE;} } file1=fopen(fname2,"r+"); if (file1==NULL) { printf("Error on file 1\n"); return EXIT_FAILURE; } while (1) { fgetpos(file1,&fptr); // get current file position fread(&record,sizeof(Rec),1,file1) !=0 ) { if( strcmp(lastName,record.nameLast)==0 && strcmp(firstName,record.nameFirst) { fsetpos(file1,&fptr); // reposition file strcpy(record.Tel,telnbr); fwrite(&record,sizeof(struct Rec),1,file1); printf("updates: %s %s %s\n",record.nameLast,record.nameFirst,record.Tel); return EXIT_SUCCESS; } } printf("Record not found\n"); return EXIT_FAILURE; }

    Write a program to load the phone number data base and create an index file to in so you can look up phone numbers by last name. #define _FILE_OFFSET_BITS 64 #define _LARGE_FILE_SUPPORT #include <stdio.h> #include <stdlib.h> #include <string.h> #define NAME_SIZE 32 #define TEL_SIZE 15 #define FNAME 128 #define BUF_SIZE 128 struct Rec { char nameLast[NAME_SIZE]; char nameFirst[NAME_SIZE]; char Tel[TEL_SIZE]; }; struct INDX { char nameLast[NAME_SIZE]; char nameFirst[NAME_SIZE]; fpos_t offset; }; int main() { char fname1[FNAME],fname2[FNAME]; FILE *file1,*file2,*indx; char buf[BUF_SIZE]; char *p1; struct Rec record; struct INDX irec; printf("Enter input file name: "); if(fgets(fname1,FNAME,stdin)==NULL) return EXIT_FAILURE; if (strlen(fname1)>1) fname1[strlen(fname1)-1]='\0'; else { printf("Bad file name 1\n"); return EXIT_FAILURE; } printf("Enter data file name: "); if(fgets(fname2,FNAME,stdin)==NULL) return EXIT_FAILURE; if (strlen(fname2)>1) fname2[strlen(fname2)-1]='\0'; else { printf("Bad file name 1\n"); return EXIT_FAILURE; } file1=fopen(fname1,"r"); file2=fopen(fname2,"w"); indx=fopen("index","w"); if (file1==NULL) { printf("Error on input file\n"); return EXIT_FAILURE; } if (file2==NULL) { printf("Error on data file\n"); return EXIT_FAILURE; } if (indx==NULL) { printf("Error on index file\n"); return EXIT_FAILURE; } while (fgets(buf,BUF_SIZE,file1)!=NULL) { // read input file buf[strlen(buf)-1] = '\0'; // chop new line p1=strtok(buf,","); // tokens will be delimited by commas if (p1==NULL || strlen(p1)>NAME_SIZE-1) { printf("Error on input\n"); return EXIT_FAILURE; } strcpy(record.nameLast,p1); p1=strtok(NULL,","); if (p1==NULL || strlen(p1)>NAME_SIZE-1) { printf("Error on input\n"); return EXIT_FAILURE; } strcpy(record.nameFirst,p1); p1=strtok(NULL,","); if (p1==NULL || strlen(p1)>TEL_SIZE-1) { printf("Error on input\n"); return EXIT_FAILURE; } strcpy(record.Tel,p1); fgetpos(file2,&irec.offset); // get current file position fwrite(&record,sizeof(struct Rec),1,file2); strcpy(irec.nameLast,record.nameLast); strcpy(irec.nameFirst,record.nameFirst); fwrite(&irec,sizeof(struct INDX),1,indx); } fclose(file1); fclose(file2); fclose(indx); file2=fopen(fname2,"r"); if (file2==NULL) { printf("err 2\n"); abort(); } printf("\nContents of master file\n\n"); while (fread(&record,sizeof(struct Rec),1,file2)) { // returns zero when done printf("%s %s %s\n", record.nameLast, record.nameFirst, record.Tel); } printf("\nContents of index file \n\n"); indx=fopen("index","r"); if (indx==NULL) { printf("err 2\n"); abort(); } while (fread(&irec,sizeof(struct INDX),1,indx)) { // returns zero when done printf("%s %s %llX", irec.nameLast, irec.nameFirst, irec.offset ); fsetpos(file2,&irec.offset); // fsetpos wants address of 2nd arg fread(&record,sizeof(struct Rec),1,file2); printf(" --> %s\n", record.Tel); } return EXIT_SUCCESS; }
    Input data file: ARENDS,DEREK,111-1234 CASH,NICHOLAS,222-1234 DANIELS,DEANDRE,333-1234 DELGADO,PHILIP,444-1234 EWING,CHASE,555-1234 HASS,BRANDON,666-1234 HOEKSTRA,CHRISTOPHER,777-1234 JENSON,CHRISTOPHER,888-1234 KAPARTHI,PRIYANKA,999-1234 KAUTEN,MATTHEW,112-1234 MADHABHUSHANAM,VENU,113-1234 NADLER,BERNARD,114-1234 NARDINI,LANDON,115-1234 NIELSEN,ROBERT,116-1234 NILLES,MARI,117-1234 OBRIEN,KYLE,118-1234 PAISLEY,JONATHAN,119-1234 REWERTS,JOSHUA,121-1234 SOLOVYEV,ALEKSANDR,122-1234 STEVENSON,SETH,124-1234 SYNDERGAARD,TONY,125-1234 WEMMIE,MATTHEW,126-1234
    Execution of the above: Enter input file name: phone.dat Enter data file name: phone Contents of master file ARENDS DEREK 111-1234 CASH NICHOLAS 222-1234 DANIELS DEANDRE 333-1234 DELGADO PHILIP 444-1234 EWING CHASE 555-1234 HASS BRANDON 666-1234 HOEKSTRA CHRISTOPHER 777-1234 JENSON CHRISTOPHER 888-1234 KAPARTHI PRIYANKA 999-1234 KAUTEN MATTHEW 112-1234 MADHABHUSHANAM VENU 113-1234 NADLER BERNARD 114-1234 NARDINI LANDON 115-1234 NIELSEN ROBERT 116-1234 NILLES MARI 117-1234 OBRIEN KYLE 118-1234 PAISLEY JONATHAN 119-1234 REWERTS JOSHUA 121-1234 SOLOVYEV ALEKSANDR 122-1234 STEVENSON SETH 124-1234 SYNDERGAARD TONY 125-1234 WEMMIE MATTHEW 126-1234 Contents of index file ARENDS DEREK 0 --> 111-1234 CASH NICHOLAS 4F --> 222-1234 DANIELS DEANDRE 9E --> 333-1234 DELGADO PHILIP ED --> 444-1234 EWING CHASE 13C --> 555-1234 HASS BRANDON 18B --> 666-1234 HOEKSTRA CHRISTOPHER 1DA --> 777-1234 JENSON CHRISTOPHER 229 --> 888-1234 KAPARTHI PRIYANKA 278 --> 999-1234 KAUTEN MATTHEW 2C7 --> 112-1234 MADHABHUSHANAM VENU 316 --> 113-1234 NADLER BERNARD 365 --> 114-1234 NARDINI LANDON 3B4 --> 115-1234 NIELSEN ROBERT 403 --> 116-1234 NILLES MARI 452 --> 117-1234 OBRIEN KYLE 4A1 --> 118-1234 PAISLEY JONATHAN 4F0 --> 119-1234 REWERTS JOSHUA 53F --> 121-1234 SOLOVYEV ALEKSANDR 58E --> 122-1234 STEVENSON SETH 5DD --> 124-1234 SYNDERGAARD TONY 62C --> 125-1234 WEMMIE MATTHEW 67B --> 126-1234 Directory after execution: -rw-r--r-- 1 xxx xxx 1760 Sep 6 10:39 index -rw-r--r-- 1 xxx xxx 1738 Sep 6 10:39 phone -rw-r--r-- 1 xxx xxx 541 Sep 6 09:32 phone.dat

    Script started on Sun Sep 9 10:41:40 2007 user@galway cat bin3.c // Copyright (c) 2007 Kevin C. O'Kane // Example binary tree (unbalanced) on a file system // using ftello(), fseeko(), fread(), fwrite(). /* This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. */ #define _FILE_OFFSET_BITS 64 #define _LARGE_FILE_SUPPORT #include <stdio.h> #include <stdlib.h> #include <string.h> #define NAME_SIZE 32 #define TEL_SIZE 15 #define FNAME 128 #define BUF_SIZE 128 #define DEBUG struct Rec { char nameLast[NAME_SIZE]; char nameFirst[NAME_SIZE]; char Tel[TEL_SIZE]; off_t left,right; }; int add_rec(FILE *, struct Rec, int); void print_file(FILE * , off_t ); off_t MAX=0; int main() { char fname1[FNAME],fname2[FNAME]; FILE *file1,*file2; char buf[BUF_SIZE]; char *p1; struct Rec record; int first=1; file1=fopen("phone.dat","r"); file2=fopen("phone","w+"); if (file1==NULL) { printf("Error on input file\n"); return EXIT_FAILURE; } if (file2==NULL) { printf("Error on data file\n"); return EXIT_FAILURE; } while (fgets(buf,BUF_SIZE,file1)!=NULL) { // read input file buf[strlen(buf)-1] = '\0'; // chop new line p1=strtok(buf,","); // tokens will be delimited by commas if (p1==NULL || strlen(p1)>NAME_SIZE-1) { printf("Error on input\n"); return EXIT_FAILURE; } strcpy(record.nameLast,p1); p1=strtok(NULL,","); if (p1==NULL || strlen(p1)>NAME_SIZE-1) { printf("Error on input\n"); return EXIT_FAILURE; } strcpy(record.nameFirst,p1); p1=strtok(NULL,","); if (p1==NULL || strlen(p1)>TEL_SIZE-1) { printf("Error on input\n"); return EXIT_FAILURE; } strcpy(record.Tel,p1); record.left=-1; // indicates no children record.right=-1; if (add_rec(file2,record,first)) { fclose (file1); fclose (file2); printf("an error has taken place.\n"); abort(); } first=0; } printf("\n---------------------------------------\n"); print_file(file2,0); fclose(file1); fclose(file2); return EXIT_SUCCESS; } int add_rec(FILE *f, struct Rec r, int first) { struct Rec tmp_rec; off_t off2,off3,off4; char name1[2*NAME_SIZE],name2[2*NAME_SIZE]; int i; strcpy(name1,r.nameLast); // concatenate names into one string strcat(name1,r.nameFirst); if (first) { fwrite(&r,sizeof(struct Rec),1,f); MAX=ftello(f); return 0; } off3=0; // search for record do { fseeko(f,off3,SEEK_SET); off4=off3; // rememeber parent fread(&tmp_rec,sizeof(struct Rec),1,f); strcpy(name2,tmp_rec.nameLast); // concatenate names strcat(name2,tmp_rec.nameFirst); if ( (i=strcmp(name1,name2)) == 0 ) { return 0; // found - bye-bye } if (i<0 ) off3=tmp_rec.left; // move to child else off3=tmp_rec.right; } while (off3 >= 0 ); // not found if < 0 fseeko(f,0,SEEK_END); // where is eof? MAX=ftello(f); if (i<0) tmp_rec.left=MAX; // hang new node on parent else tmp_rec.right=MAX; fseeko(f,off4,SEEK_SET); fwrite(&tmp_rec,sizeof(struct Rec),1,f); // write parent fseeko(f,0,SEEK_END); MAX=ftello(f); fwrite(&r,sizeof(struct Rec),1,f); // write child #ifdef DEBUG printf("add %s %s at %lld \n", r.nameFirst, r.nameLast, MAX); // annoying debug stuff MAX=ftello(f); printf("file size = %lld \n", MAX); #endif return 0; } void print_file(FILE * f, off_t root) { // this could overflow the program stack struct Rec tmp; if (root < 0) return; fseeko(f,root,SEEK_SET); fread(&tmp,sizeof(struct Rec),1,f); if (tmp.left >= 0 ) print_file(f, tmp.left); printf("%s %s %s %lld\n",tmp.nameLast, tmp.nameFirst, tmp.Tel, root); if (tmp.right >= 0 ) print_file(f, tmp.right); return; } user@galway gcc -o bin3 bin3.c user@galway cat makedata.c #include <stdio.h> #include <stdlib.h> #include <time.h> int main() { int i; srand(time(NULL)); // init seed for (i=0; i< 10000; i++) printf("LN%d,FN%d,%d-%d\n", rand()%100000, rand()%100000, rand()%1000, rand()%10000); } user@galway gcc -o makedata makedata.c user@galway makedata > phone.dat user@galway bin3 add FN46431 LN41244 at 96 file size = 192 add FN83179 LN72209 at 192 file size = 288 add FN24259 LN6976 at 288 file size = 384 add FN76458 LN42704 at 384 file size = 480 add FN11980 LN60299 at 480 file size = 576 add FN64811 LN10252 at 576 file size = 672 . . . add FN71317 LN46667 at 959328 file size = 959424 add FN42105 LN11184 at 959424 file size = 959520 add FN76039 LN17943 at 959520 file size = 959616 add FN30210 LN28502 at 959616 file size = 959712 add FN49227 LN47006 at 959712 file size = 959808 add FN70997 LN43532 at 959808 file size = 959904 add FN7703 LN86872 at 959904 file size = 960000 --------------------------------------- LN0 FN58826 272-3685 618240 LN10000 FN6006 742-790 734496 LN10001 FN28376 939-6317 297120 LN10037 FN46208 360-5123 675648 LN10037 FN62869 471-6585 839712 LN1003 FN63787 549-8092 698304 LN10045 FN76965 97-8128 190560 LN10053 FN23445 534-4451 508512 . . . LN99936 FN5754 133-1867 853344 LN99945 FN86768 436-9393 273408 LN99962 FN57209 61-9048 344928 LN99965 FN7044 449-4130 252672 LN99982 FN57851 763-669 683520 LN99994 FN24905 605-8471 810240 LN9999 FN39935 403-3328 173760 user@galway ls -l phone* -rw-r--r-- 1 user None 960000 Sep 9 10:42 phone -rw-r--r-- 1 user None 245485 Sep 9 10:42 phone.dat user@galway exit Script done on Sun Sep 9 10:42:53 2007

    Script started on Thu Sep 13 13:09:53 2007 user@galway cat btree5.c // Example b-tree // using ftello(), fseeko(), fread(), fwrite(). // Copyright (c) 2007 Kevin C. O'Kane /* This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. */ #define _FILE_OFFSET_BITS 64 #define _LARGE_FILE_SUPPORT #include <stdio.h> #include <stdlib.h> #include <string.h> #include <unistd.h> #include <time.h> // -------------- BTREE PARAMETERS --------------------- #define NBR_ITEMS 2000 #define KEY_SIZE 32 #define FNAME 128 #define BUF_SIZE 128 #define TREE_NAME "btree.dat" #define PRINT #define STORE 0 #define RETRIEVE 1 #define DELETE 2 #define CLOSE 3 struct entry { char key[KEY_SIZE]; off_t data; }; // NBR_ITEMS needs to be even - I think? struct block { struct entry item[NBR_ITEMS+1]; off_t pointer[NBR_ITEMS+2]; }; static struct block tmp2; static struct entry up; static off_t loc1; int add_rec(FILE *, off_t , char *, off_t ); struct entry * search(FILE *,char *, off_t); void dump(char *, FILE *f,off_t root); void dump1(char *, FILE *f,struct block); struct entry * Btree(int, char *, off_t); // -------------- BTREE PARAMETERS --------------------- int main() { FILE *input; char buf[BUF_SIZE]; char key[KEY_SIZE]; off_t data; char * p1; time_t t1,t2; t1=time(NULL); input=fopen("btreedata","r"); while (fgets(buf,BUF_SIZE,input)!=NULL) { // read input file buf[strlen(buf)-1] = '\0'; // chop new line #ifdef PRINT printf("add ------------------------> "); puts(buf); #endif p1=strtok(buf,","); // tokens will be delimited by commas if (p1==NULL || strlen(p1)>KEY_SIZE-1) { printf("Error on input\n"); return EXIT_FAILURE; } strcpy(key,p1); p1=strtok(NULL,","); if (p1==NULL || strlen(p1)>KEY_SIZE-1) { printf("Error on input\n"); return EXIT_FAILURE; } sscanf(p1,"%lld",&data); if (Btree(STORE,key,data) == NULL) return EXIT_FAILURE; } printf("BEGIN RETRIEVE PHASE\n"); rewind(input); // go back to start of input file while (fgets(buf,BUF_SIZE,input)!=NULL) { // read input file struct entry *t1; buf[strlen(buf)-1] = '\0'; // chop new line p1=strtok(buf,","); // tokens will be delimited by commas if ( (t1=Btree(RETRIEVE,p1,0)) ==NULL) { printf("not found \n"); return EXIT_FAILURE; } #ifdef PRINT else printf("%s %lld\n",t1->key,t1->data); #endif } Btree(CLOSE,p1,0); printf("time=%d\n",time(NULL)-t1); return EXIT_SUCCESS; } //++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ struct entry * Btree(int action, char *key, off_t data) { static FILE * btree = NULL; off_t root; if (action == CLOSE) { if ( btree != NULL ) fclose (btree); btree = NULL; return NULL; } if (action == RETRIEVE) { if ( btree == NULL ) return NULL; return search(btree,key,0); } if (btree == NULL ) { // file not open if ( access(TREE_NAME,F_OK) ) { // new file btree=fopen(TREE_NAME,"w+"); if (btree==NULL) { printf("Error on btree file\n"); return NULL; } root = -1; fwrite(&root,sizeof(off_t),1,btree); // create root record } else { // existing file btree=fopen(TREE_NAME,"r+"); if (btree==NULL) { printf("Error on btree file\n"); return NULL; } } } if (add_rec(btree,0,key,data)) { // 0 means use root // special case - root split and we need to // make a new root off_t root; int j; fseeko(btree,0,SEEK_SET); // old root fread(&root,sizeof(off_t),1,btree); // advances fp for (j=1; j<NBR_ITEMS+1; j++) { // zap it tmp2.pointer[j]=-1; tmp2.item[j].key[0]='\0'; tmp2.item[j].data=-1; } strcpy(tmp2.item[0].key,up.key); // key sent up from below tmp2.item[0].data=up.data; // data sent up from below tmp2.pointer[0]=loc1; // less than child tmp2.pointer[1]=root; // old root block fseeko(btree,0,SEEK_END); // find eof root=ftello(btree); fwrite(&tmp2,sizeof(struct block),1,btree); // write new root fseek(btree,0,SEEK_SET); fwrite(&root,sizeof(off_t),1,btree); // new root } strcpy(up.key,key); up.data=data; return &up; } int add_rec(FILE *f, off_t start, char *key, off_t data) { off_t root,off1,off2,off3; int i,j,k; struct block tmp1; int flg1; loc1=-1; if (start==0) { fseeko(f,0,SEEK_SET); fread(&root,sizeof(off_t),1,f); // advances fp } else root=start; // begin with a block other than root if (root == -1 ) { // new tree - make first block strcpy(tmp1.item[0].key,key); // key tmp1.item[0].data=data; // data pointer tmp1.pointer[0]=-1; // descendent for (i=1; i<NBR_ITEMS+1; i++) { // zero the rest tmp1.item[i].key[0]='\0'; tmp1.item[i].data=-1; tmp1.pointer[i]=-1; } tmp1.pointer[NBR_ITEMS]=-1; // high end down pointer // because start == 0, we are at first byte beyond root record root=ftello(f); fwrite(&tmp1,sizeof(struct block),1,f); // write first block fseek(f,0,SEEK_SET); fwrite(&root,sizeof(off_t),1,f); // new root return 0; } fseeko(f,root,SEEK_SET); fread(&tmp1,sizeof(struct block),1,f); flg1=0; for (i=0; i<NBR_ITEMS; i++) { if ( strlen(tmp1.item[i].key)==0) { flg1=1; // empty key found - end of keys break; } if ( (j=strcmp(key,tmp1.item[i].key)) == 0 ) { // compare keys tmp1.item[i].data=data; // found - just update data pointer fseeko(f,root,SEEK_SET); fwrite(&tmp1,sizeof(struct block),1,f); return 0; // done } if (j>0) continue; // search key greater than recorded key break; // search key less than recorded key // not in this block } if (tmp1.pointer[i]>=0) { // lower block exists - descend if ( add_rec(f,tmp1.pointer[i],key,data)==0 ) // if not 0, a key was sent up return 0; // finished - no key sent up. strcpy(key,up.key); // a split occurred below and this key was sent up data=up.data; // data pointer sent up } // insert into long block - block has one extra slot for (j=NBR_ITEMS; j>=i; j--) { // shift to create opening tmp1.pointer[j]=tmp1.pointer[j-1]; tmp1.item[j]=tmp1.item[j-1]; } tmp1.pointer[i]=loc1; // child ptr - zero or sent from below strcpy(tmp1.item[i].key,key); // key being added tmp1.item[i].data=data; // data being added for (k=0; k<NBR_ITEMS+1; k++) if (strlen(tmp1.item[k].key)==0) break; // find end of block (k) if (k<NBR_ITEMS) { // easy insert - block had space fseeko(f,root,SEEK_SET); fwrite(&tmp1,sizeof(struct block),1,f); return 0; // block ok } // split block - block full strcpy(up.key,tmp1.item[NBR_ITEMS/2].key); // key to be sent up up.data=tmp1.item[NBR_ITEMS/2].data; // data to be sent up // tmp2 will be the low order block resulting from the split for (j=0; j<=NBR_ITEMS/2; j++) { // copy low order data from tmp1 tmp2.pointer[j]=tmp1.pointer[j]; tmp2.item[j]=tmp1.item[j]; // structure copy } for (j=NBR_ITEMS/2+1; j<NBR_ITEMS+1; j++) { // zap the remainder tmp2.pointer[j]=-1; tmp2.item[j].key[0]='\0'; tmp2.item[j].data=-1; } fseeko(f,0,SEEK_END); // advance to endfile and record location loc1=ftello(f); fwrite(&tmp2,sizeof(struct block),1,f); // write low block out // tmp1 is the high order block resulting from the split for (j=0; j<NBR_ITEMS/2; j++) { // shift its contents down to beginning tmp1.pointer[j]=tmp1.pointer[NBR_ITEMS/2+j]; tmp1.item[j]=tmp1.item[NBR_ITEMS/2+j]; } for (j=NBR_ITEMS/2; j<NBR_ITEMS; j++) { // zap its high items tmp1.pointer[j]=-1; tmp1.item[j].key[0]='\0'; tmp1.item[j].data=-1; } tmp1.pointer[NBR_ITEMS/2]=tmp1.pointer[NBR_ITEMS]; // move its high end child ptr tmp1.pointer[NBR_ITEMS]=0; // zap it fseeko(f,root,SEEK_SET); fwrite(&tmp1,sizeof(struct block),1,f); // write high half return 1; // key/data/child ptr being sent up } struct entry * search(FILE * f, char *key, off_t root) { off_t off1,off2,off3; int i,j; static struct block tmp1; int flg1; if (root==0) { fseeko(f,0,SEEK_SET); fread(&root,sizeof(off_t),1,f); // advances fp } fseeko(f,root,SEEK_SET); fread(&tmp1,sizeof(struct block),1,f); flg1=0; for (i=0; i<NBR_ITEMS; i++) { if ( strlen(tmp1.item[i].key)==0) { flg1=1; break; } // empty key if ( (j=strcmp(key,tmp1.item[i].key)) == 0 ) { return &tmp1.item[i]; } if (j>0) continue; break; } if (tmp1.pointer[i]>=0) { // descend - may be high key root=tmp1.pointer[i]; return search(f,key,root); } return NULL; } void dump(char * cap, FILE *f,off_t root) { struct block tmp; int i; fseeko(f,root,SEEK_SET); fread(&tmp,sizeof(struct block),1,f); printf("***dump=%s from block nbr %lld\n",cap,root); for (i=0; i<NBR_ITEMS+1; i++) { printf("%d key=%s %lld %lld\n",i,tmp.item[i].key,tmp.item[i].data,tmp.pointer[i]); } return; } void dump1(char * cap, FILE *f,struct block tmp) { int i; printf("***dump=%s from block ***\n",cap); for (i=0; i<NBR_ITEMS+1; i++) { printf("%d key=%s %lld %lld\n",i,tmp.item[i].key,tmp.item[i].data,tmp.pointer[i]); } return; } user@galway gcc -o btree btree5.c user@galway cat makedata1.c #include <stdio.h> #include <stdlib.h> #include <time.h> int main() { int i; srand(time(NULL)); // init seed for (i=0; i< 100000; i++) printf("KEY%d,%d\n", rand()%100000000, rand()%10000); } user@galway gcc makedata1.c user@galway ./a > btreedata user@galway wc btreedata 100000 100000 1677759 btreedata user@galway rm btree.dat user@galway btree > xxx user@galway tail xxx KEY27633166 5247 KEY31362650 5646 KEY98559545 4918 KEY87499190 3014 KEY15346345 8584 KEY37346962 8745 KEY8143336 8681 KEY77780524 6016 KEY22495053 9573 time=504 user@galway tail btreedata KEY10106054,4146 KEY27633166,5247 KEY31362650,5646 KEY98559545,4918 KEY87499190,3014 KEY15346345,8584 KEY37346962,8745 KEY8143336,8681 KEY77780524,6016 KEY22495053,9573 user@galway exit Script done on Thu Sep 13 13:19:34 2007

    The following is a bit absurd since each block points to its immediate successor in the file but it nonetheless illustrates the technique. Your program will have multiple lists each with their own starting points. The idea, however, is the same. Script started on Tue Sep 25 20:53:34 2007 user@galway cat list.c // Example list on disk // using ftello(), fseeko(), fread(), fwrite(). // Copyright (c) 2007 Kevin C. O'Kane /* This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. */ #define _FILE_OFFSET_BITS 64 #define _LARGE_FILE_SUPPORT #include <stdio.h> #include <stdlib.h> #include <string.h> #include <unistd.h> #define BLKSIZE 10 #define KEY_SIZE 32 #define FNAME 128 #define BUF_SIZE 128 #define PRINT struct dblock { off_t next; int count; off_t data[BLKSIZE]; }; int List(off_t, FILE *); int main() { FILE *input, *lst; char buf[BUF_SIZE]; char key[KEY_SIZE]; off_t data,start; char * p1; input=fopen("btreedata","r"); lst=fopen("list","w+"); data=8; start=-1; fwrite(&data,sizeof(off_t),1,lst); if (input==NULL || lst ==NULL) return EXIT_FAILURE; while (fgets(buf,BUF_SIZE,input)!=NULL) { // read input file buf[strlen(buf)-1] = '\0'; // chop new line #ifdef PRINT printf("add ------------------------> "); puts(buf); #endif p1=strtok(buf,","); // tokens will be delimited by commas if (p1==NULL || strlen(p1)>KEY_SIZE-1) { printf("Error on input\n"); return EXIT_FAILURE; } strcpy(key,p1); p1=strtok(NULL,","); if (p1==NULL || strlen(p1)>KEY_SIZE-1) { printf("Error on input\n"); return EXIT_FAILURE; } sscanf(p1,"%lld",&data); if (List(data,lst) == EXIT_FAILURE) return EXIT_FAILURE; } } int List(off_t store, FILE *lst) { struct dblock rec; static off_t start = -1; off_t t1,t2; int i; if (start == -1) { for (i=1; i<BLKSIZE; i++) rec.data[i]=-1; rec.data[0]=store; rec.next=-1; rec.count=1; fseeko(lst,8,SEEK_SET); fwrite(&rec,sizeof(struct dblock),1,lst); start=ftello(lst); fseeko(lst,0,SEEK_SET); fwrite(&start,sizeof(off_t),1,lst); // next available byte printf(" *added to block %lld\n",start); return EXIT_SUCCESS; } start=8; while(1) { // find a block with space fseeko(lst,start,SEEK_SET); fread(&rec,sizeof(struct dblock),1,lst); if (rec.count<BLKSIZE) { // this block has space rec.data[rec.count]=store; rec.count++; fseeko(lst,start,SEEK_SET); fwrite(&rec,sizeof(rec),1,lst); printf(" added to existing block %lld\n",start); return EXIT_SUCCESS; } if (rec.next>0) { // this block has a successor start=rec.next; continue; // get nect block } // create a new block and attach it to previous t1=start; // where we are now fseeko(lst,0,SEEK_SET); fread(&t2,sizeof(off_t),1,lst); // find end of file fseeko(lst,t2,SEEK_SET); // move there for (i=1; i<BLKSIZE; i++) rec.data[i]=-1; // zap a block rec.data[0]=store; rec.next=-1; rec.count=1; fwrite(&rec,sizeof(struct dblock),1,lst); // write new block start=ftello(lst); // rememebr were we are fseeko(lst,t1,SEEK_SET); return to previous block fread(&rec,sizeof(struct dblock),1,lst); rec.next=t2; // its next pointer fseeko(lst,t1,SEEK_SET); //reposition and write out fwrite(&rec,sizeof(rec),1,lst); fseeko(lst,0,SEEK_SET); fwrite(&start,sizeof(off_t),1,lst); // next available byte printf(" **** new block created - added to block %lld\n",t2); return EXIT_SUCCESS; } } user@galway gcc list.c user@galway ./a add ------------------------> KEY12202611,8827 *added to block 104 add ------------------------> KEY75641989,6647 added to existing block 8 add ------------------------> KEY88037476,9034 added to existing block 8 add ------------------------> KEY31001589,2070 added to existing block 8 add ------------------------> KEY48938096,5973 added to existing block 8 add ------------------------> KEY58828456,7381 added to existing block 8 add ------------------------> KEY10368821,8099 added to existing block 8 add ------------------------> KEY74671703,5551 added to existing block 8 add ------------------------> KEY26963350,5696 added to existing block 8 add ------------------------> KEY45761874,1331 added to existing block 8 add ------------------------> KEY73077555,18 **** new block created - added to block 104 add ------------------------> KEY37762374,8644 added to existing block 104 add ------------------------> KEY39720990,3252 added to existing block 104 add ------------------------> KEY97993505,3556 added to existing block 104 add ------------------------> KEY31118981,6666 added to existing block 104 add ------------------------> KEY78876501,4915 added to existing block 104 add ------------------------> KEY83460319,5522 added to existing block 104 add ------------------------> KEY49619502,4889 added to existing block 104 add ------------------------> KEY60424275,1370 added to existing block 104 add ------------------------> KEY14120081,7967 added to existing block 104 add ------------------------> KEY92909914,8531 **** new block created - added to block 200 add ------------------------> KEY42378528,3983 added to existing block 200 add ------------------------> KEY95004912,9369 added to existing block 200 add ------------------------> KEY1205093,8935 added to existing block 200 add ------------------------> KEY51051657,1463 added to existing block 200 add ------------------------> KEY95629953,7286 added to existing block 200 add ------------------------> KEY76781674,2417 added to existing block 200 add ------------------------> KEY49991783,5831 added to existing block 200 add ------------------------> KEY70590066,7765 added to existing block 200 add ------------------------> KEY22958050,8121 added to existing block 200 add ------------------------> KEY34199541,7465 **** new block created - added to block 296 add ------------------------> KEY21738492,9321 added to existing block 296 add ------------------------> KEY78303560,118 added to existing block 296 add ------------------------> KEY25871614,8915 added to existing block 296 add ------------------------> KEY70470212,6113 added to existing block 296 add ------------------------> KEY33349205,7645 added to existing block 296 add ------------------------> KEY84747425,9344 added to existing block 296 add ------------------------> KEY42928837,2327 added to existing block 296 add ------------------------> KEY41776074,5899 added to existing block 296 add ------------------------> KEY90044443,1371 added to existing block 296 add ------------------------> KEY8228352,9863 **** new block created - added to block 392 add ------------------------> KEY16667928,5813 added to existing block 392 . . .

  2. Relational Algebra link