doit( A_INT, 123, A_STR, " hello, ", A_INT, 456, A_STR, " world\n", A_NULL ); return 0; } 7.39. Napishite neskol'ko funkcij dlya raboty s uproshchennoj bazoj dannyh. Zapis' v baze dannyh soderzhit klyuch - celoe, i stroku fiksirovannoj dliny: struct data { int b_key; /* klyuch */ char b_data[ DATALEN ]; /* informaciya */ }; Napishite: - dobavlenie zapisi - unichtozhenie po klyuchu - poisk po klyuchu (i pechat' stroki) - obnovlenie po klyuchu. Fajl organizovan kak nesortirovannyj massiv zapisej bez dublikatov (t.e. klyuchi ne mogut povtoryat'sya). Poisk proizvodit' linejno. Ispol'zujte funkcii fread, fwrite, fseek. Poslednyaya funkciya pozvolyaet vam pozicionirovat'sya k n-oj zapisi fajla: fseek( fp, (long) n * sizeof(struct data), 0 ); Perepishite etu programmu, ob®yaviv klyuch kak stroku, naprimer A. Bogatyrev, 1992-95 - 309 - Si v UNIX char b_key[ KEYLEN ]; Esli stroka-klyuch koroche KEYLEN simvolov, ona dolzhna okanchivat'sya '\0', inache - ispol'zuyutsya vse KEYLEN bukv i '\0' na konce otsutstvuet (tak zhe ustroeno pole d_name v katalogah fajlovoj sistemy). Usovershenstvujte algoritm dostupa, ispol'zuya heshiro- vanie po klyuchu (hash - peremeshivanie, sm. primer v prilozhenii). Vynesite klyuchi v otdel'nyj fajl. |tot fajl klyuchej sostoit iz struktur struct record_header { int b_key ; /* klyuch */ long b_offset; /* adres zapisi v fajle dannyh */ int b_length; /* dlina zapisi (neobyazatel'no) */ }; to est' organizovan analogichno nashej pervoj baze dannyh. Snachala vy ishchete nuzhnyj klyuch v fajle klyuchej. Pole b_offset u najdennogo klyucha zadaet adres dannogo v drugom fajle. CHtoby prochitat' ego, nado sdelat' fseek na rasstoyanie b_offset v fajle dannyh i prochest' b_length bajt. 7.40. Organizujte bazu dannyh v fajle kak spisok zapisej. V kazhdoj zapisi vmesto klyucha dolzhen hranit'sya nomer ocherednoj zapisi (ssylka). Napishite funkcii: poiska dan- nyh v spiske (po znacheniyu), dobavleniya dannyh v spisok v alfavitnom poryadke, (oni prosto pripisyvayutsya k koncu fajla, no v nuzhnyh mestah perestavlyayutsya ssylki), raspe- chatki spiska v poryadke ssylok, udaleniyu elementov iz spiska (iz samogo fajla oni ne udalyayutsya!). Ssylka (nomer) pervoj zapisi (golovy spiska) hranitsya v pervyh dvuh bajtah fajla, rassmatrivaemyh kak short. Vvedite optimizaciyu: napishite funkciyu dlya sortirovki fajla (prevrashcheniyu pereme- shannogo spiska v linejnyj) i vycherkivaniya iz nego udalennyh zapisej. Pri etom fajl budet perezapisan. Esli fajl otsortirovan, to poisk v nem mozhno proizvodit' bolee effektivno, chem proslezhivanie cepochki ssylok: prosto linejnym prosmotrom. Tretij bajt fajla ispol'zujte kak priznak: 1 - fajl byl otsortirovan, 0 - posle sortirovki v nego bylo chto-to dobavleno i linejnyj poryadok narushen. 7.41. Napishite funkciyu match(stroka,shablon); dlya proverki sootvetstviya stroki upro- shchennomu regulyarnomu vyrazheniyu v stile SHell. Metasimvoly shablona: * - lyuboe chislo lyubyh simvolov (0 i bolee); ? - odin lyuboj simvol. Uslozhnenie: [bukvy] - lyubaya iz perechislennyh bukv. [!bukvy] - lyubaya iz bukv, krome perechislennyh. [h-z] - lyubaya iz bukv ot h do z vklyuchitel'no. Ukazanie: dlya proverki "ostatka" stroki ispol'zujte rekursivnyj vyzov etoj zhe funk- cii. Ispol'zuya etu funkciyu, napishite programmu, kotoraya vydelyaet iz fajla SLOVA, udovletvoryayushchie zadannomu shablonu (naprimer, "[Ii]*o*t"). Imeetsya v vidu, chto kazhduyu stroku nado snachala razbit' na slova, a potom proverit' kazhdoe slovo. A. Bogatyrev, 1992-95 - 310 - Si v UNIX #include <stdio.h> #include <string.h> #include <locale.h> #define U(c) ((c) & 0377) /* podavlenie rasshireniya znaka */ #define QUOT '\\' /* ekraniruyushchij simvol */ #ifndef MATCH_ERR # define MATCH_ERR printf("Net ]\n") #endif /* s - sopostavlyaemaya stroka * p - shablon. Simvol \ otmenyaet specznachenie metasimvola. */ int match (register char *s, register char *p) { register int scc; /* tekushchij simvol stroki */ int c, cc, lc; /* lc - predydushchij simvol v [...] spiske */ int ok, notflag; for (;;) { scc = U(*s++); /* ocherednoj simvol stroki */ switch (c = U (*p++)) { /* ocherednoj simvol shablona */ case QUOT: /* a*\*b */ c = U (*p++); if( c == 0 ) return(0); /* oshibka: pattern\ */ else goto def; case '[': /* lyuboj simvol iz spiska */ ok = notflag = 0; lc = 077777; /* dostatochno bol'shoe chislo */ if(*p == '!'){ notflag=1; p++; } while (cc = U (*p++)) { if (cc == ']') { /* konec perechisleniya */ if (ok) break; /* sopostavilos' */ return (0); /* ne sopostavilos' */ } if (cc == '-') { /* interval simvolov */ if (notflag){ /* ne iz diapazona - OK */ if (!syinsy (lc, scc, U (*p++))) ok++; /* iz diapazona - neudacha */ else return (0); } else { /* simvol iz diapazona - OK */ if (syinsy (lc, scc, U (*p++))) ok++; } } else { if (cc == QUOT){ /* [\[\]] */ cc = U(*p++); if(!cc) return(0);/* oshibka */ } if (notflag){ if (scc && scc != (lc = cc)) ok++; /* ne vhodit v spisok */ else return (0); } else { A. Bogatyrev, 1992-95 - 311 - Si v UNIX if (scc == (lc = cc)) /* vhodit v spisok */ ok++; } } } if (cc == 0){ /* konec stroki */ MATCH_ERR; return (0); /* oshibka */ } continue; case '*': /* lyuboe chislo lyubyh simvolov */ if (!*p) return (1); for (s--; *s; s++) if (match (s, p)) return (1); return (0); case 0: return (scc == 0); default: def: if (c != scc) return (0); continue; case '?': /* odin lyuboj simvol */ if (scc == 0) return (0); continue; } } } /* Proverit', chto smy lezhit mezhdu smax i smin */ int syinsy (unsigned smin, unsigned smy, unsigned smax) { char left [2]; char right [2]; char middle [2]; left [0] = smin; left [1] = '\0'; right [0] = smax; right [1] = '\0'; middle[0] = smy; middle[1] = '\0'; return (strcoll(left, middle) <= 0 && strcoll(middle, right) <= 0); } Obratite vnimanie na to, chto v UNIX rasshireniem shablonov imen fajlov, vrode *.c, zanimaetsya ne operacionnaya sistema (kak v MS DOS), a programma-interpretator komand pol'zovatelya (shell: /bin/sh, /bin/csh, /bin/ksh). |to pozvolyaet obrabatyvat' (v principe) raznye stili shablonov imen. 7.42. Izuchite razdel rukovodstva man regexp i include-fajl /usr/include/regexp.h, soderzhashchij ishodnye teksty funkcij compile i step dlya regulyarnogo vyrazheniya v stile programm ed, lex, grep: odna bukva C ili zaekranirovannyj specsimvol \. \[ \* \$ \^ \\ oznachayut sami sebya; A. Bogatyrev, 1992-95 - 312 - Si v UNIX . oznachaet odin lyuboj simvol krome \n; [abc] ili [a-b] oznachaet lyuboj simvol iz perechislennyh (iz intervala); [abc-] minus v konce oznachaet sam simvol -; []abc] vnutri [] skobka ] na pervom meste oznachaet sama sebya; [^a-z] kryshka ^ oznachaet otricanie, t.e. lyuboj simvol krome perechislennyh; [a-z^] kryshka ne na pervom meste oznachaet sama sebya; [\*.] specsimvoly vnutri [] ne nesut special'nogo znacheniya, a predstavlyayut sami sebya; C* lyuboe (0 i bolee) chislo simvolov C; .* lyuboe chislo lyubyh simvolov; vyrazhenie* lyuboe chislo (0 i bolee) povtorenij vyrazheniya, naprimer [0-9]* oznachaet chislo (posledovatel'nost' cifr) ili pustoe mesto. Ishchetsya samoe dlinnoe prizhatoe vlevo podvyrazhenie; vyrazhenie\{n,m\} povtorenie vyrazheniya ot n do m raz (vklyuchitel'no), gde chisla ne prevoshodyat 255; vyrazhenie\{n,\} povtorenie po krajnej mere n raz, naprimer [0-9]\{1,\} oznachaet chislo; vyrazhenie\{n\} povtorenie rovno n raz; vyrazhenie$ stroka, chej konec udovletvoryaet vyrazheniyu, naprimer .*define.*\\$ ^vyrazhenie stroka, ch'e nachalo udovletvoryaet vyrazheniyu; \n simvol perevoda stroki; \(.....\) segment. Sopostavivshayasya s nim podstroka budet zapomnena; \N gde N cifra. Dannyj uchastok obrazca dolzhen sovpadat' s N-ym segmentom (numeraciya s 1). Napishite funkciyu matchReg, ispol'zuyushchuyu etot stil' regulyarnyh vyrazhenij. Sohranyajte shablon, pri vyzove matchReg sravnivajte staryj shablon s novym. Perekompilyaciyu sleduet proizvodit' tol'ko esli shablon izmenilsya: #include <stdio.h> #include <ctype.h> #define INIT register char *sp = instring; #define GETC() (*sp++) #define PEEKC() (*sp) #define UNGETC(c) (--sp) #define RETURN(ptr) return #define ERROR(code) \ {fprintf(stderr,"%s:ERR%d\n",instring,code);exit(177);} # include <regexp.h> #define EOL '\0' /* end of line */ #define ESIZE 512 int matchReg(char *str, char *pattern){ static char oldPattern[256]; static char compiledExpr[ESIZE]; if( strcmp(pattern, oldPattern)){ /* razlichny */ /* compile regular expression */ compile(pattern, compiledExpr, &compiledExpr[ESIZE], EOL); A. Bogatyrev, 1992-95 - 313 - Si v UNIX strcpy(oldPattern, pattern); /* zapomnit' */ } return step(str, compiledExpr); /* sopostavit' */ } /* Primer vyzova: reg '^int' 'int$' char | less */ /* reg 'putchar.*(.*)' < reg.c | more */ void main(int ac, char **av){ char inputline[BUFSIZ]; register i; while(gets(inputline)){ for(i=1; i < ac; i++) if(matchReg(inputline, av[i])){ char *p; extern char *loc1, *loc2; /*printf("%s\n", inputline);*/ /* Napechatat' stroku, * vydelyaya sopostavivshuyusya chast' zhirno */ for(p=inputline; p != loc1; p++) putchar(*p); for( ; p != loc2; p++) if(isspace((unsigned char) *p)) putchar(*p); else printf("%c\b%c", *p, *p); for( ; *p; p++) putchar(*p); putchar('\n'); break; } } } 7.43. Ispol'zuya <regexp.h> napishite programmu, proizvodyashchuyu kontekstnuyu zamenu vo vseh strokah fajla. Esli stroka ne udovletvoryaet regulyarnomu vyrazheniyu - ona ostaetsya neizmennoj. Primery vyzova: $ regsub '\([0-9]\{1,\}\)' '(\1)' $ regsub 'f(\(.*\),\(.*\))' 'f(\2,\1)' < file Vtoraya komanda dolzhna zamenyat' vse vhozhdeniya f(a,b) na f(b,a). Vyrazhenie, oboznachen- noe v obrazce kak \(...\), podstavlyaetsya na mesto sootvetstvuyushchej konstrukcii \N vo vtorom argumente, gde N - cifra, nomer segmenta. CHtoby pomestit' v vyhod sam simvol \, ego nado udvaivat': \\. A. Bogatyrev, 1992-95 - 314 - Si v UNIX /* Kontekstnaya zamena */ #include <stdio.h> #include <ctype.h> #define INIT register char *sp = instring; #define GETC() (*sp++) #define PEEKC() (*sp) #define UNGETC(c) (--sp) #define RETURN(ptr) return #define ERROR(code) regerr(code) void regerr(); # include <regexp.h> #define EOL '\0' /* end of line */ #define ESIZE 512 short all = 0; /* klyuch -a oznachaet, chto v stroke nado zamenit' VSE vhozhdeniya obrazca (global, all): * regsub -a int INT * "aa int bbb int cccc" -> "aa INT bbb INT cccc" * * step() nahodit SAMUYU DLINNUYU podstroku, udovletvoryayushchuyu vyrazheniyu, * poetomu regsub 'f(\(.*\),\(.*\))' 'f(\2,\1)' * zamenit "aa f(1,2) bb f(3,4) cc" -> "aa f(4,1,2) bb f(3) cc' * |___________|_| |_|___________| */ char compiled[ESIZE], line[512]; A. Bogatyrev, 1992-95 - 315 - Si v UNIX void main(int ac, char *av[]){ register char *s, *p; register n; extern int nbra; extern char *braslist[], *braelist[], *loc1, *loc2; if( ac > 1 && !strcmp(av[1], "-a")){ ac--; av++; all++; } if(ac != 3){ fprintf(stderr, "Usage: %s [-a] pattern subst\n", av[0]); exit(1); } compile(av[1], compiled, compiled + sizeof compiled, EOL); while( gets(line) != NULL ){ if( !step(s = line, compiled)){ printf("%s\n", line); continue; } do{ /* Pechataem nachalo stroki */ for( ; s != loc1; s++) putchar(*s); /* Delaem zamenu */ for(s=av[2]; *s; s++) if(*s == '\\'){ if(isdigit(s[1])){ /* segment */ int num = *++s - '1'; if(num < 0 || num >= nbra){ fprintf(stderr, "Bad block number %d\n", num+1); exit(2); } for(p=braslist[num]; p != braelist[num]; ++p) putchar(*p); } else if(s[1] == '&'){ ++s; /* vsya sopostavlennaya stroka */ for(p=loc1; p != loc2; ++p) putchar(*p); } else putchar(*++s); } else putchar(*s); } while(all && step(s = loc2, compiled)); /* Ostatok stroki */ for(s=loc2; *s; s++) putchar(*s); putchar('\n'); } /* endwhile */ } A. Bogatyrev, 1992-95 - 316 - Si v UNIX void regerr(int code){ char *msg; switch(code){ case 11: msg = "Range endpoint too large."; break; case 16: msg = "Bad number."; break; case 25: msg = "\\digit out of range."; break; case 36: msg = "Illegal or missing delimiter."; break; case 41: msg = "No remembered search string."; break; case 42: msg = "\\(~\\) imbalance."; break; case 43: msg = "Too many \\(."; break; case 44: msg = "More than 2 numbers given in \\{~\\\"}."; break; case 45: msg = "} expected after \\."; break; case 46: msg = "First number exceeds second in \\{~\\}."; break; case 49: msg = "[ ] imbalance."; break; case 50: msg = "Regular expression overflow."; break; default: msg = "Unknown error"; break; } fputs(msg, stderr); fputc('\n', stderr); exit(code); } void prfields(){ int i; for(i=0; i < nbra; i++) prfield(i); } void prfield(int n){ char *fbeg = braslist[n], *fend = braelist[n]; printf("\\%d='", n+1); for(; fbeg != fend; fbeg++) putchar(*fbeg); printf("'\n"); } 7.44. Sostav'te funkciyu poiska podstroki v stroke. Ispol'zuya ee, napishite programmu poiska podstroki v tekstovom fajle. Programma dolzhna vyvodit' stroki (libo nomera strok) fajla, v kotoryh vstretilas' dannaya podstroka. Podstroka zadaetsya v kachestve argumenta funkcii main(). /* Algoritm bystrogo poiska podstroki. * Dzh. Mur, R. Bojer, 1976 Texas * Smotri: Communications of the ACM 20, 10 (Oct., 1977), 762-772 * * |tot algoritm vygoden pri mnogokratnom poiske obrazca v * bol'shom kolichestve strok, prichem esli oni ravnoj dliny - * mozhno sekonomit' eshche i na operacii strlen(str). * Algoritm harakteren tem, chto pri neudache proizvodit sdvig ne na * odin, a srazu na neskol'ko simvolov vpravo. * V luchshem sluchae algoritm delaet slen/plen sravnenij. */ char *pattern; /* obrazec (chto iskat') */ static int plen; /* dlina obrazca */ static int d[256]; /* tablica sdvigov; v alfavite ASCII - * 256 bukv. */ /* rasstoyanie ot konca obrazca do pozicii i v nem */ #define DISTANCE(i) ((plen-1) - (i)) A. Bogatyrev, 1992-95 - 317 - Si v UNIX /* Poisk: * vydat' indeks vhozhdeniya pattern v str, * libo -1, esli ne vhodit */ int indexBM( str ) char *str; /* v chem iskat' */ { int slen = strlen(str); /* dlina stroki */ register int pindx; /* indeks sravnivaemoj bukvy v obrazce */ register int cmppos; /* indeks sravnivaemoj bukvy v stroke */ register int endpos; /* poziciya v stroke, k kotoroj "pristavlyaetsya" * poslednyaya bukva obrazca */ /* poka obrazec pomeshchaetsya v ostatok stroki */ for( endpos = plen-1; endpos < slen ; ){ /* Dlya otladki: pr(str, pattern, endpos - (plen-1), 0); /**/ /* prosmotr obrazca ot konca k nachalu */ for( cmppos = endpos, pindx = (plen - 1); pindx >= 0 ; cmppos--, pindx-- ) if( str[cmppos] != pattern[pindx] ){ /* Sdvig, kotoryj stavit samyj pravyj v obrazce * simvol str[endpos] kak raz pod endpos-tuyu * poziciyu stroki. Esli zhe takoj simvol v obrazce ne * soderzhitsya (ili soderzhitsya tol'ko na konce), * to nachalo obrazca ustanavlivaetsya v endpos+1 uyu * poziciyu */ endpos += d[ str[endpos] & 0377 ]; break; /* & 0377 podavlyaet rasshirenie znaka. Eshche */ } /* mozhno sdelat' vse char -> unsigned char */ if( pindx < 0 ) return ( endpos - (plen-1)); /* Nashel: ves' obrazec vlozhilsya */ } return( -1 ); /* Ne najdeno */ } A. Bogatyrev, 1992-95 - 318 - Si v UNIX /* Razmetka tablicy sdvigov */ void compilePatternBM( ptrn ) char *ptrn; { register int c; pattern = ptrn; plen = strlen(ptrn); /* c - nomer bukvy alfavita */ for(c = 0; c < 256; c++) d[c] = plen; /* sdvig na dlinu vsego obrazca */ /* c - poziciya v obrazce */ for(c = 0; c < plen - 1; c++) d[ pattern[c] & 0377 ] = DISTANCE(c); /* Sdvig raven rasstoyaniyu ot samogo pravogo * (krome poslednej bukvy obrazca) * vhozhdeniya bukvy v obrazec do konca obrazca. * Zametim, chto esli bukva vhodit v obrazec neskol'ko raz, * to cikl uchityvaet poslednee (samoe pravoe) vhozhdenie. */ } /* Pechat' najdennyh strok */ void pr(s, p, n, nl) char *s, *p; { register i; printf("%4d\t%s\n", nl, s ); printf(" \t"); for(i = 0; i < n; i++ ) putchar( s[i] == '\t' ? '\t' : ' ' ); printf( "%s\n", p ); } /* Analog programmy fgrep */ #include <stdio.h> char str[ 1024 ]; /* bufer dlya prochitannoj stroki */ void main(ac, av) char **av; { int nline = 0; /* nomer stroki fajla */ int ind; int retcode = 1; if(ac != 2){ fprintf(stderr, "Usage: %s 'pattern'\n", av[0] ); exit(33); } compilePatternBM( av[1] ); while( gets(str) != NULL ){ nline++; if((ind = indexBM(str)) >= 0 ){ retcode = 0; /* O'KAY */ pr(str, pattern, ind, nline); } } exit(retcode); } A. Bogatyrev, 1992-95 - 319 - Si v UNIX /* Primer raboty algoritma: peter piper picked a peck of pickled peppers. peck peter piper picked a peck of pickled peppers. peck peter piper picked a peck of pickled peppers. peck peter piper picked a peck of pickled peppers. peck peter piper picked a peck of pickled peppers. peck peter piper picked a peck of pickled peppers. peck peter piper picked a peck of pickled peppers. peck peter piper picked a peck of pickled peppers. peck */ 7.45. Napishite analogichnuyu programmu, vydayushchuyu vse stroki, udovletvoryayushchie uproshchen- nomu regulyarnomu vyrazheniyu, zadavaemomu kak argument dlya main(). Ispol'zujte funkciyu match, napisannuyu nami ranee. Vy napisali analog programmy grep iz UNIX (no s drugim tipom regulyarnogo vyrazheniya, nezheli v originale). 7.46. Sostav'te funkciyu expand(s1, s2), kotoraya rasshiryaet sokrashchennye oboznacheniya vida a-z stroki s1 v ekvivalentnyj polnyj spisok abcd...xyz v stroke s2. Dopuskayutsya sokrashcheniya dlya strochnyh i propisnyh bukv i cifr. Uchtite sluchai tipa a-b-c, a-z0-9 i -a-g (soglashenie sostoit v tom, chto simvol "-", stoyashchij v nachale ili v konce, vospri- nimaetsya bukval'no). 7.47. Napishite programmu, chitayushchuyu fajl i zamenyayushchuyu stroki vida |<1 i bolee probelov i tabulyacij><tekst> na pary strok |.pp |<tekst> (zdes' | oboznachaet levyj kraj fajla, a <> - metasimvoly). |to - prostejshij prepro- cessor, gotovyashchij tekst v formate nroff (eto formatter tekstov v UNIX). Uslozhneniya: - stroki, nachinayushchiesya s tochki ili s apostrofa, zamenyat' na \&<tekst, nachinayushchijsya s tochki ili '> - stroki, nachinayushchiesya s cifry, zamenyat' na .ip <chislo> <tekst> - simvol \ zamenyat' na posledovatel'nost' \e. - udalyat' probely pered simvolami .,;:!?) i vstavlyat' posle nih probel (znak pre- pinaniya dolzhen byt' prikleen k koncu slova, inache on mozhet byt' perenesen na sleduyushchuyu stroku. Vy kogda-nibud' videli stroku, nachinayushchuyusya s zapyatoj?). - skleivat' perenesennye slova, poskol'ku nroff delaet perenosy sam: ....xxxx nachalo- => ....xxxx nachalokonec konec yyyy...... yyyy................ A. Bogatyrev, 1992-95 - 320 - Si v UNIX Vyzyvajte etot preprocessor razmetki teksta tak: $ prep fajly... | nroff -me > text.lp 7.48. Sostav'te programmu preobrazovaniya propisnyh bukv iz fajla vvoda v strochnye, ispol'zuya pri etom funkciyu, v kotoroj neobhodimo organizovat' analiz simvola (dejst- vitel'no li eto bukva). Strochnye bukvy vydavat' bez izmeneniya. Ukazanie: ispol'zujte makrosy iz <ctype.h>. Otvet: #include <ctype.h> #include <stdio.h> main(){ int c; while( (c = getchar()) != EOF ) putchar( isalpha( c ) ? (isupper( c ) ? tolower( c ) : c) : c); } libo ... putchar( isalpha(c) && isupper(c) ? tolower(c) : c ); libo dazhe putchar( isupper(c) ? tolower(c) : c ); V poslednem sluchae pod isupper i islower dolzhny ponimat'sya tol'ko bukvy (uvy, ne vo vseh realizaciyah eto tak!). 7.49. Obratite vnimanie, chto esli my vydelyaem klass simvolov pri pomoshchi sravneniya, naprimer: char ch; if( 0300 <= ch && ch < 0340 ) ...; (v kodirovke KOI-8 eto malen'kie russkie bukvy), to my mozhem natolknut'sya na sleduyu- shchij syurpriz: pered sravneniem s celym znachenie ch privoditsya k tipu int (privedenie takzhe delaetsya pri ispol'zovanii char v kachestve argumenta funkcii). Pri etom, esli u ch byl ustanovlen starshij bit (0200), proizojdet rasshirenie ego vo ves' starshij bajt (rasshirenie znakovogo bita). Rezul'tatom budet otricatel'noe celoe chislo! Opyt: char c = '\201'; /* = 129 */ printf( "%d\n", c ); pechataetsya -127. Takim obrazom, nashe sravnenie ne srabotaet, t.k. okazyvaetsya chto ch < 0. Sleduet podavlyat' rasshirenie znaka: if( 0300 <= (ch & 0377) && (ch & 0377) < 0340) ...; (0377 - maska iz 8 bit, ona zhe 0xFF, ves' bajt), libo ob®yavit' unsigned char ch; chto oznachaet, chto pri privedenii k int znakovyj bit ne rasshiryaetsya. 7.50. Rassmotrim eshche odin primer: A. Bogatyrev, 1992-95 - 321 - Si v UNIX main(){ char ch; /* 0377 - kod poslednego simvola alfavita ASCII */ for (ch = 0100; ch <= 0377; ch++ ) printf( "%03o %s\n", ch & 0377, ch >= 0300 && ch < 0340 ? "yes" : "no" ); } Kakie nepriyatnosti zhdut nas zdes'? - vo-pervyh, kogda bit 0200 u ch ustanovlen, v sravnenii ch vystupaet kak otrica- tel'noe celoe chislo (t.k. privedenie k int delaetsya rasshireniem znakovogo bita), to est' u nas vsegda pechataetsya "no". |to my mozhem ispravit', napisav unsigned char ch, libo ispol'zuya ch v vide (ch & 0377) ili ((unsigned) ch) - vo-vtoryh, rassmotrim sam cikl. Pust' sejchas ch =='\377'. Uslovie ch <= 0377 istinno. Vypolnyaetsya operator ch++. No ch - eto bajt, poetomu operacii nad nim proizvodyatsya po modulyu 0400 (0377 - eto maksimal'noe znachenie, kotoroe mozhno hranit' v bajte - vse bity edinicy). To est' teper' znacheniem ch stanet 0. No 0 < 0377 i uslovie cikla verno! Cikl prodolzhaetsya; t.e. proishodit zacikliva- nie. Izbezhat' etogo mozhno tol'ko opisav int ch; chtoby 0377+1 bylo ravno 0400, a ne 0 (ili unsigned int, lish' by dliny peremennoj hvatalo, chtoby vmestit' chislo bol'she 0377). 7.51. Sostav'te programmu, preobrazuyushchuyu tekst, sostoyashchij tol'ko iz strochnyh bukv v tekst, sostoyashchij iz propisnyh i strochnyh bukv. Pervaya bukva i bukva posle kazhdoj tochki - propisnye, ostal'nye - strochnye. slovo odin. slovo dva. --> Slovo odin. Slovo dva. |ta programma mozhet okazat'sya poleznoj dlya preobrazovaniya teksta, nabrannogo v odnom registre, v tekst, soderzhashchij bukvy oboih registrov. 7.52. Napishite programmu, ispravlyayushchuyu opechatki v slovah (spell check): programme zadan spisok slov; ona proveryaet - yavlyaetsya li vvedennoe vami slovo slovom iz spiska. Esli net - pytaetsya najti naibolee pohozhee slovo iz spiska, prichem esli est' nes- kol'ko pohozhih - vydaet vse varianty. Otlavlivajte sluchai: - dve sosednie bukvy perestavleny mestami: nozhincy=>nozhnicy; - udvoennaya bukva (bukvy): kkarrandash=>karandash; - poteryana bukva: bot=>bolt; - izmenennaya bukva: bint=>bant; - lishnyaya bukva: morda=>moda; - bukvy ne v tom registre - sravnite s kazhdym slovom iz spiska, privodya vse bukvy k malen'kim: sOVOk=>sovok; Nado proveryat' kazhduyu bukvu slova. Vozmozhno vam budet udobno ispol'zovat' rekursiyu. Podskazka: dlya nekotoryh proverok vam mozhet pomoch' funkciya match: slovo_tablicy = "dom"; if(strlen(vhodnoe_slovo) <= strlen(slovo_tablicy)+1 && match(vhodnoe_slovo, "*d*o*m*") ... /* pohozhe */ *o*m* ?dom dom? *d*m* d?om *d*o* do?m Privedem variant resheniya etoj zadachi: A. Bogatyrev, 1992-95 - 322 - Si v UNIX #include <stdio.h> #include <ctype.h> #include <locale.h> typedef unsigned char uchar; #define ANYCHAR '*' /* simvol, sopostavlyayushchijsya s odnoj lyuboj bukvoj */ static uchar version[120]; /* bufer dlya generacii variantov */ static uchar vv; /* bukva, sopostavivshayasya s ANYCHAR */ /* privesti vse bukvy k odnomu registru */ static uchar icase(uchar c){ return isupper(c) ? tolower(c) : c; } /* sravnenie strok s ignorirovaniem registra */ static int eqi(uchar *s1, uchar *s2 ) { while( *s1 && *s2 ){ if( icase( *s1 ) != icase( *s2 )) break; s1++; s2++; } return ( ! *s1 && ! *s2 ) ? 1 : 0 ; /* OK : FAIL */ } /* sravnenie strok s ignorirovaniem ANYCHAR */ static strok(register uchar *word, register uchar *pat) { while( *word && *pat ){ if( *word == ANYCHAR){ /* Nevazhno, chto est' *pat, no zapomnim */ vv= *pat; } else { if( icase(*pat) != icase(*word) ) break; } word++; pat++; } /* esli slova konchilis' odnovremenno ... */ return ( !*word && !*pat) ? 1 : 0; /* OK : FAIL */ } A. Bogatyrev, 1992-95 - 323 - Si v UNIX /* LISHNYAYA BUKVA */ static int superfluous( uchar *word /* slovo dlya korrekcii */ , uchar *s /* etalon */ ){ register int i,j,k; int reply; register len = strlen(word); for(i=0 ; i < len ; i++){ /* generim slova , poluchayushchiesya udaleniem odnoj bukvy */ k=0; for(j=0 ; j < i ; j++) version[k++]=word[j]; for(j=i+1 ; j < len ; j++) version[k++]=word[j]; version[k]='\0'; if( eqi( version, s )) return 1; /* OK */ } return 0; /* FAIL */ } /* POTERYANA BUKVA */ static int hole; /* mesto, gde vstavlena ANYCHAR */ static int lost(uchar *word, uchar *s) { register int i,j,k; register len = strlen(word); hole= (-1); for(i=0 ; i < len+1 ; i++){ k=0; for(j=0 ; j < i ; j++) version[k++]=word[j]; version[k++]=ANYCHAR; for(j=i ; j < len ; j++) version[k++]=word[j]; version[k]='\0'; if( strok( version, s )){ hole=i; return 1; /* OK */ } } return 0; /* FAIL */ } A. Bogatyrev, 1992-95 - 324 - Si v UNIX /* IZMENILASX ODNA BUKVA (vklyuchaet sluchaj oshibki registra) */ static int changed(uchar *word, uchar *s) { register int i,j,k; register len = strlen(word); hole = (-1); for(i=0 ; i < len ; i++){ k=0; for( j=0 ; j < i ; j++) version[k++]=word[j]; version[k++]=ANYCHAR; for( j=i+1 ; j < len ; j++) version[k++]=word[j]; version[k]='\0'; if( strok( version,s)){ hole=i; return 1; /* OK */ } } return 0; /* FAIL */ } /* UDVOENNAYA BUKVA */ static int duplicates(uchar *word, uchar *s, int leng) { register int i,j,k; uchar tmp[80]; if( eqi( word, s )) return 1; /* OK */ for(i=0;i < leng - 1; i++) /* ishchem parnye bukvy */ if( word[i]==word[i+1]){ k=0; for(j=0 ; j < i ; j++) tmp[k++]=word[j]; for(j=i+1 ; j < leng ; j++) tmp[k++]=word[j]; tmp[k]='\0'; if( duplicates( tmp, s, leng-1) == 1) return 1; /* OK */ } return 0; /* FAIL */ } A. Bogatyrev, 1992-95 - 325 - Si v UNIX /* PERESTAVLENY SOSEDNIE BUKVY */ static int swapped(uchar *word, uchar *s) { register int i,j,k; register len = strlen(word); for(i=0;i < len-1;i++){ k=0; for(j=0 ; j < i ; j++) version[k++]=word[j]; version[k++]=word[i+1]; version[k++]=word[i]; for(j=i+2 ; j < len ; j++) version[k++]=word[j]; version[k]='\0'; if( eqi( version, s)) return 1; /* OK */ } return 0; /* FAIL */ } uchar *words[] = { (uchar *) "bag", (uchar *) "bags", (uchar *) "cook", (uchar *) "cool", (uchar *) "bug", (uchar *) "buy", (uchar *) "cock", NULL }; #define Bcase(x, operators) case x: { operators; } break; char *cname[5] = { "perestavleny bukvy", "udvoeny bukvy ", "poteryana bukva ", "oshibochnaya bukva ", "lishnyaya bukva " }; A. Bogatyrev, 1992-95 - 326 - Si v UNIX static int spellmatch( uchar *word /* IN slovo dlya korrekcii */ , uchar *words[] /* IN tablica dopustimyh slov */ , uchar **indx /* OUT otvet */ ){ int i, code, total = (-1); uchar **ptr; if(!*word) return -1; for(ptr = words; *ptr; ++ptr) if(eqi(word, *ptr)){ if(indx) *indx = *ptr; return 0; } /* Net v tablice, nuzhen podbor pohozhih */ for(ptr = words; *ptr; ++ptr){ uchar *s = *ptr; int max = 5; for(i=0; i < max; i++){ switch( i ){ Bcase(0,code = swapped(word, s) ) Bcase(1,code = duplicates(word, s, strlen(word)) ) Bcase(2,code = lost(word, s) ) Bcase(3,code = changed(word, s) ) Bcase(4,code = superfluous(word, s) ) } if(code){ total++; printf("?\t%s\t%s\n", cname[i], s); if(indx) *indx = s; /* V sluchae s dublikatami ne rassmatrivat' * na nalichie lishnih bukv */ if(i==1) max = 4; } } } return total; } A. Bogatyrev, 1992-95 - 327 - Si v UNIX void main(){ uchar inpbuf[BUFSIZ]; int n; uchar *reply, **ptr; setlocale(LC_ALL, ""); for(ptr = words; *ptr; ptr++) printf("#\t%s\n", *ptr); do{ printf("> "); fflush(stdout); if(gets((char *)inpbuf) == NULL) break; switch(spellmatch(inpbuf, words, &reply)){ case -1: printf("Net takogo slova\n"); break; case 0: printf("Slovo '%s'\n", reply); break; default: printf("Neodnoznachno\n"); } } while(1); } 7.53. Poka ya sam pisal etu programmu, ya sdelal dve oshibki, kotorye dolzhny byt' ves'ma harakterny dlya novichkov. Pro nih nado by govorit' ran'she, v glave pro stroki i v samoj pervoj glave, no tut oni prishlis' kak raz k mestu. Vopros: chto pechataet sle- duyushchaya programma? #include <stdio.h> char *strings[] = { "Pervaya stroka" "Vtoraya stroka" "Tretyaya stroka", "CHetvertaya stroka", NULL }; void main(){ char **p; for(p=strings;*p;++p) printf("%s\n", *p); } A pechataet ona vot chto: Pervaya strokaVtoraya strokaTretyaya stroka CHetvertaya stroka Delo v tom, chto ANSI kompilyator Si skleivaet stroki: "nachalo stroki" "i ee konec" esli oni razdeleny probelami v smysle isspace, v tom chisle i pustymi strokami. A v nashem ob®yavlenii massiva strok strings my pot