doit(
A_INT, 123,
A_STR, " hello, ",
A_INT, 456,
A_STR, " world\n",
A_NULL
);
return 0;
}
7.39. Napishite neskol'ko funkcij dlya raboty s uproshchennoj bazoj dannyh. Zapis' v baze
dannyh soderzhit klyuch - celoe, i stroku fiksirovannoj dliny:
struct data {
int b_key; /* klyuch */
char b_data[ DATALEN ]; /* informaciya */
};
Napishite:
- dobavlenie zapisi
- unichtozhenie po klyuchu
- poisk po klyuchu (i pechat' stroki)
- obnovlenie po klyuchu.
Fajl organizovan kak nesortirovannyj massiv zapisej bez dublikatov (t.e. klyuchi ne
mogut povtoryat'sya). Poisk proizvodit' linejno. Ispol'zujte funkcii fread, fwrite,
fseek. Poslednyaya funkciya pozvolyaet vam pozicionirovat'sya k n-oj zapisi fajla:
fseek( fp, (long) n * sizeof(struct data), 0 );
Perepishite etu programmu, ob®yaviv klyuch kak stroku, naprimer
A. Bogatyrev, 1992-95 - 309 - Si v UNIX
char b_key[ KEYLEN ];
Esli stroka-klyuch koroche KEYLEN simvolov, ona dolzhna okanchivat'sya '\0', inache -
ispol'zuyutsya vse KEYLEN bukv i '\0' na konce otsutstvuet (tak zhe ustroeno pole d_name
v katalogah fajlovoj sistemy). Usovershenstvujte algoritm dostupa, ispol'zuya heshiro-
vanie po klyuchu (hash - peremeshivanie, sm. primer v prilozhenii). Vynesite klyuchi v
otdel'nyj fajl. |tot fajl klyuchej sostoit iz struktur
struct record_header {
int b_key ; /* klyuch */
long b_offset; /* adres zapisi v fajle dannyh */
int b_length; /* dlina zapisi (neobyazatel'no) */
};
to est' organizovan analogichno nashej pervoj baze dannyh. Snachala vy ishchete nuzhnyj
klyuch v fajle klyuchej. Pole b_offset u najdennogo klyucha zadaet adres dannogo v drugom
fajle. CHtoby prochitat' ego, nado sdelat' fseek na rasstoyanie b_offset v fajle dannyh
i prochest' b_length bajt.
7.40. Organizujte bazu dannyh v fajle kak spisok zapisej. V kazhdoj zapisi vmesto
klyucha dolzhen hranit'sya nomer ocherednoj zapisi (ssylka). Napishite funkcii: poiska dan-
nyh v spiske (po znacheniyu), dobavleniya dannyh v spisok v alfavitnom poryadke, (oni
prosto pripisyvayutsya k koncu fajla, no v nuzhnyh mestah perestavlyayutsya ssylki), raspe-
chatki spiska v poryadke ssylok, udaleniyu elementov iz spiska (iz samogo fajla oni ne
udalyayutsya!). Ssylka (nomer) pervoj zapisi (golovy spiska) hranitsya v pervyh dvuh
bajtah fajla, rassmatrivaemyh kak short.
Vvedite optimizaciyu: napishite funkciyu dlya sortirovki fajla (prevrashcheniyu pereme-
shannogo spiska v linejnyj) i vycherkivaniya iz nego udalennyh zapisej. Pri etom fajl
budet perezapisan. Esli fajl otsortirovan, to poisk v nem mozhno proizvodit' bolee
effektivno, chem proslezhivanie cepochki ssylok: prosto linejnym prosmotrom. Tretij
bajt fajla ispol'zujte kak priznak: 1 - fajl byl otsortirovan, 0 - posle sortirovki v
nego bylo chto-to dobavleno i linejnyj poryadok narushen.
7.41. Napishite funkciyu match(stroka,shablon); dlya proverki sootvetstviya stroki upro-
shchennomu regulyarnomu vyrazheniyu v stile SHell. Metasimvoly shablona:
* - lyuboe chislo lyubyh simvolov (0 i bolee);
? - odin lyuboj simvol.
Uslozhnenie:
[bukvy] - lyubaya iz perechislennyh bukv.
[!bukvy] - lyubaya iz bukv, krome perechislennyh.
[h-z] - lyubaya iz bukv ot h do z vklyuchitel'no.
Ukazanie: dlya proverki "ostatka" stroki ispol'zujte rekursivnyj vyzov etoj zhe funk-
cii.
Ispol'zuya etu funkciyu, napishite programmu, kotoraya vydelyaet iz fajla SLOVA,
udovletvoryayushchie zadannomu shablonu (naprimer, "[Ii]*o*t"). Imeetsya v vidu, chto kazhduyu
stroku nado snachala razbit' na slova, a potom proverit' kazhdoe slovo.
A. Bogatyrev, 1992-95 - 310 - Si v UNIX
#include <stdio.h>
#include <string.h>
#include <locale.h>
#define U(c) ((c) & 0377) /* podavlenie rasshireniya znaka */
#define QUOT '\\' /* ekraniruyushchij simvol */
#ifndef MATCH_ERR
# define MATCH_ERR printf("Net ]\n")
#endif
/* s - sopostavlyaemaya stroka
* p - shablon. Simvol \ otmenyaet specznachenie metasimvola.
*/
int match (register char *s, register char *p)
{
register int scc; /* tekushchij simvol stroki */
int c, cc, lc; /* lc - predydushchij simvol v [...] spiske */
int ok, notflag;
for (;;) {
scc = U(*s++); /* ocherednoj simvol stroki */
switch (c = U (*p++)) { /* ocherednoj simvol shablona */
case QUOT: /* a*\*b */
c = U (*p++);
if( c == 0 ) return(0); /* oshibka: pattern\ */
else goto def;
case '[': /* lyuboj simvol iz spiska */
ok = notflag = 0;
lc = 077777; /* dostatochno bol'shoe chislo */
if(*p == '!'){ notflag=1; p++; }
while (cc = U (*p++)) {
if (cc == ']') { /* konec perechisleniya */
if (ok)
break; /* sopostavilos' */
return (0); /* ne sopostavilos' */
}
if (cc == '-') { /* interval simvolov */
if (notflag){
/* ne iz diapazona - OK */
if (!syinsy (lc, scc, U (*p++)))
ok++;
/* iz diapazona - neudacha */
else return (0);
} else {
/* simvol iz diapazona - OK */
if (syinsy (lc, scc, U (*p++)))
ok++;
}
}
else {
if (cc == QUOT){ /* [\[\]] */
cc = U(*p++);
if(!cc) return(0);/* oshibka */
}
if (notflag){
if (scc && scc != (lc = cc))
ok++; /* ne vhodit v spisok */
else return (0);
} else {
A. Bogatyrev, 1992-95 - 311 - Si v UNIX
if (scc == (lc = cc)) /* vhodit v spisok */
ok++;
}
}
}
if (cc == 0){ /* konec stroki */
MATCH_ERR;
return (0); /* oshibka */
}
continue;
case '*': /* lyuboe chislo lyubyh simvolov */
if (!*p)
return (1);
for (s--; *s; s++)
if (match (s, p))
return (1);
return (0);
case 0:
return (scc == 0);
default: def:
if (c != scc)
return (0);
continue;
case '?': /* odin lyuboj simvol */
if (scc == 0)
return (0);
continue;
}
}
}
/* Proverit', chto smy lezhit mezhdu smax i smin
*/
int syinsy (unsigned smin, unsigned smy, unsigned smax)
{
char left [2];
char right [2];
char middle [2];
left [0] = smin; left [1] = '\0';
right [0] = smax; right [1] = '\0';
middle[0] = smy; middle[1] = '\0';
return (strcoll(left, middle) <= 0 && strcoll(middle, right) <= 0);
}
Obratite vnimanie na to, chto v UNIX rasshireniem shablonov imen fajlov, vrode *.c,
zanimaetsya ne operacionnaya sistema (kak v MS DOS), a programma-interpretator komand
pol'zovatelya (shell: /bin/sh, /bin/csh, /bin/ksh). |to pozvolyaet obrabatyvat' (v
principe) raznye stili shablonov imen.
7.42. Izuchite razdel rukovodstva man regexp i include-fajl /usr/include/regexp.h,
soderzhashchij ishodnye teksty funkcij compile i step dlya regulyarnogo vyrazheniya v stile
programm ed, lex, grep:
odna bukva C
ili zaekranirovannyj specsimvol \. \[ \* \$ \^ \\ oznachayut sami sebya;
A. Bogatyrev, 1992-95 - 312 - Si v UNIX
. oznachaet odin lyuboj simvol krome \n;
[abc]
ili [a-b] oznachaet lyuboj simvol iz perechislennyh (iz intervala);
[abc-]
minus v konce oznachaet sam simvol -;
[]abc]
vnutri [] skobka ] na pervom meste oznachaet sama sebya;
[^a-z]
kryshka ^ oznachaet otricanie, t.e. lyuboj simvol krome perechislennyh;
[a-z^]
kryshka ne na pervom meste oznachaet sama sebya;
[\*.]
specsimvoly vnutri [] ne nesut special'nogo znacheniya, a predstavlyayut sami sebya;
C* lyuboe (0 i bolee) chislo simvolov C;
.* lyuboe chislo lyubyh simvolov;
vyrazhenie*
lyuboe chislo (0 i bolee) povtorenij vyrazheniya, naprimer [0-9]* oznachaet chislo
(posledovatel'nost' cifr) ili pustoe mesto. Ishchetsya samoe dlinnoe prizhatoe vlevo
podvyrazhenie;
vyrazhenie\{n,m\}
povtorenie vyrazheniya ot n do m raz (vklyuchitel'no), gde chisla ne prevoshodyat 255;
vyrazhenie\{n,\}
povtorenie po krajnej mere n raz, naprimer [0-9]\{1,\} oznachaet chislo;
vyrazhenie\{n\}
povtorenie rovno n raz;
vyrazhenie$
stroka, chej konec udovletvoryaet vyrazheniyu, naprimer .*define.*\\$
^vyrazhenie
stroka, ch'e nachalo udovletvoryaet vyrazheniyu;
\n simvol perevoda stroki;
\(.....\)
segment. Sopostavivshayasya s nim podstroka budet zapomnena;
\N gde N cifra. Dannyj uchastok obrazca dolzhen sovpadat' s N-ym segmentom (numeraciya
s 1).
Napishite funkciyu matchReg, ispol'zuyushchuyu etot stil' regulyarnyh vyrazhenij. Sohranyajte
shablon, pri vyzove matchReg sravnivajte staryj shablon s novym. Perekompilyaciyu sleduet
proizvodit' tol'ko esli shablon izmenilsya:
#include <stdio.h>
#include <ctype.h>
#define INIT register char *sp = instring;
#define GETC() (*sp++)
#define PEEKC() (*sp)
#define UNGETC(c) (--sp)
#define RETURN(ptr) return
#define ERROR(code) \
{fprintf(stderr,"%s:ERR%d\n",instring,code);exit(177);}
# include <regexp.h>
#define EOL '\0' /* end of line */
#define ESIZE 512
int matchReg(char *str, char *pattern){
static char oldPattern[256];
static char compiledExpr[ESIZE];
if( strcmp(pattern, oldPattern)){ /* razlichny */
/* compile regular expression */
compile(pattern,
compiledExpr, &compiledExpr[ESIZE], EOL);
A. Bogatyrev, 1992-95 - 313 - Si v UNIX
strcpy(oldPattern, pattern); /* zapomnit' */
}
return step(str, compiledExpr); /* sopostavit' */
}
/* Primer vyzova: reg '^int' 'int$' char | less */
/* reg 'putchar.*(.*)' < reg.c | more */
void main(int ac, char **av){
char inputline[BUFSIZ]; register i;
while(gets(inputline)){
for(i=1; i < ac; i++)
if(matchReg(inputline, av[i])){
char *p; extern char *loc1, *loc2;
/*printf("%s\n", inputline);*/
/* Napechatat' stroku,
* vydelyaya sopostavivshuyusya chast' zhirno */
for(p=inputline; p != loc1; p++) putchar(*p);
for( ; p != loc2; p++)
if(isspace((unsigned char) *p))
putchar(*p);
else printf("%c\b%c", *p, *p);
for( ; *p; p++) putchar(*p);
putchar('\n');
break;
}
}
}
7.43. Ispol'zuya <regexp.h> napishite programmu, proizvodyashchuyu kontekstnuyu zamenu vo
vseh strokah fajla. Esli stroka ne udovletvoryaet regulyarnomu vyrazheniyu - ona ostaetsya
neizmennoj. Primery vyzova:
$ regsub '\([0-9]\{1,\}\)' '(\1)'
$ regsub 'f(\(.*\),\(.*\))' 'f(\2,\1)' < file
Vtoraya komanda dolzhna zamenyat' vse vhozhdeniya f(a,b) na f(b,a). Vyrazhenie, oboznachen-
noe v obrazce kak \(...\), podstavlyaetsya na mesto sootvetstvuyushchej konstrukcii \N vo
vtorom argumente, gde N - cifra, nomer segmenta. CHtoby pomestit' v vyhod sam simvol
\, ego nado udvaivat': \\.
A. Bogatyrev, 1992-95 - 314 - Si v UNIX
/* Kontekstnaya zamena */
#include <stdio.h>
#include <ctype.h>
#define INIT register char *sp = instring;
#define GETC() (*sp++)
#define PEEKC() (*sp)
#define UNGETC(c) (--sp)
#define RETURN(ptr) return
#define ERROR(code) regerr(code)
void regerr();
# include <regexp.h>
#define EOL '\0' /* end of line */
#define ESIZE 512
short all = 0;
/* klyuch -a oznachaet, chto v stroke nado zamenit' VSE vhozhdeniya obrazca (global, all):
* regsub -a int INT
* "aa int bbb int cccc" -> "aa INT bbb INT cccc"
*
* step() nahodit SAMUYU DLINNUYU podstroku, udovletvoryayushchuyu vyrazheniyu,
* poetomu regsub 'f(\(.*\),\(.*\))' 'f(\2,\1)'
* zamenit "aa f(1,2) bb f(3,4) cc" -> "aa f(4,1,2) bb f(3) cc'
* |___________|_| |_|___________|
*/
char compiled[ESIZE], line[512];
A. Bogatyrev, 1992-95 - 315 - Si v UNIX
void main(int ac, char *av[]){
register char *s, *p; register n; extern int nbra;
extern char *braslist[], *braelist[], *loc1, *loc2;
if( ac > 1 && !strcmp(av[1], "-a")){ ac--; av++; all++; }
if(ac != 3){
fprintf(stderr, "Usage: %s [-a] pattern subst\n", av[0]);
exit(1);
}
compile(av[1], compiled, compiled + sizeof compiled, EOL);
while( gets(line) != NULL ){
if( !step(s = line, compiled)){
printf("%s\n", line); continue;
}
do{
/* Pechataem nachalo stroki */
for( ; s != loc1; s++) putchar(*s);
/* Delaem zamenu */
for(s=av[2]; *s; s++)
if(*s == '\\'){
if(isdigit(s[1])){ /* segment */
int num = *++s - '1';
if(num < 0 || num >= nbra){
fprintf(stderr, "Bad block number %d\n", num+1);
exit(2);
}
for(p=braslist[num]; p != braelist[num]; ++p)
putchar(*p);
} else if(s[1] == '&'){
++s; /* vsya sopostavlennaya stroka */
for(p=loc1; p != loc2; ++p)
putchar(*p);
} else putchar(*++s);
} else putchar(*s);
} while(all && step(s = loc2, compiled));
/* Ostatok stroki */
for(s=loc2; *s; s++) putchar(*s);
putchar('\n');
} /* endwhile */
}
A. Bogatyrev, 1992-95 - 316 - Si v UNIX
void regerr(int code){ char *msg;
switch(code){
case 11: msg = "Range endpoint too large."; break;
case 16: msg = "Bad number."; break;
case 25: msg = "\\digit out of range."; break;
case 36: msg = "Illegal or missing delimiter."; break;
case 41: msg = "No remembered search string."; break;
case 42: msg = "\\(~\\) imbalance."; break;
case 43: msg = "Too many \\(."; break;
case 44: msg = "More than 2 numbers given in \\{~\\\"}."; break;
case 45: msg = "} expected after \\."; break;
case 46: msg = "First number exceeds second in \\{~\\}."; break;
case 49: msg = "[ ] imbalance."; break;
case 50: msg = "Regular expression overflow."; break;
default: msg = "Unknown error"; break;
} fputs(msg, stderr); fputc('\n', stderr); exit(code);
}
void prfields(){
int i;
for(i=0; i < nbra; i++)
prfield(i);
}
void prfield(int n){
char *fbeg = braslist[n], *fend = braelist[n];
printf("\\%d='", n+1);
for(; fbeg != fend; fbeg++)
putchar(*fbeg);
printf("'\n");
}
7.44. Sostav'te funkciyu poiska podstroki v stroke. Ispol'zuya ee, napishite programmu
poiska podstroki v tekstovom fajle. Programma dolzhna vyvodit' stroki (libo nomera
strok) fajla, v kotoryh vstretilas' dannaya podstroka. Podstroka zadaetsya v kachestve
argumenta funkcii main().
/* Algoritm bystrogo poiska podstroki.
* Dzh. Mur, R. Bojer, 1976 Texas
* Smotri: Communications of the ACM 20, 10 (Oct., 1977), 762-772
*
* |tot algoritm vygoden pri mnogokratnom poiske obrazca v
* bol'shom kolichestve strok, prichem esli oni ravnoj dliny -
* mozhno sekonomit' eshche i na operacii strlen(str).
* Algoritm harakteren tem, chto pri neudache proizvodit sdvig ne na
* odin, a srazu na neskol'ko simvolov vpravo.
* V luchshem sluchae algoritm delaet slen/plen sravnenij.
*/
char *pattern; /* obrazec (chto iskat') */
static int plen; /* dlina obrazca */
static int d[256]; /* tablica sdvigov; v alfavite ASCII -
* 256 bukv. */
/* rasstoyanie ot konca obrazca do pozicii i v nem */
#define DISTANCE(i) ((plen-1) - (i))
A. Bogatyrev, 1992-95 - 317 - Si v UNIX
/* Poisk:
* vydat' indeks vhozhdeniya pattern v str,
* libo -1, esli ne vhodit
*/
int indexBM( str ) char *str; /* v chem iskat' */
{
int slen = strlen(str); /* dlina stroki */
register int pindx; /* indeks sravnivaemoj bukvy v obrazce */
register int cmppos; /* indeks sravnivaemoj bukvy v stroke */
register int endpos; /* poziciya v stroke, k kotoroj "pristavlyaetsya"
* poslednyaya bukva obrazca */
/* poka obrazec pomeshchaetsya v ostatok stroki */
for( endpos = plen-1; endpos < slen ; ){
/* Dlya otladki: pr(str, pattern, endpos - (plen-1), 0); /**/
/* prosmotr obrazca ot konca k nachalu */
for( cmppos = endpos, pindx = (plen - 1);
pindx >= 0 ;
cmppos--, pindx-- )
if( str[cmppos] != pattern[pindx] ){
/* Sdvig, kotoryj stavit samyj pravyj v obrazce
* simvol str[endpos] kak raz pod endpos-tuyu
* poziciyu stroki. Esli zhe takoj simvol v obrazce ne
* soderzhitsya (ili soderzhitsya tol'ko na konce),
* to nachalo obrazca ustanavlivaetsya v endpos+1 uyu
* poziciyu
*/
endpos += d[ str[endpos] & 0377 ];
break; /* & 0377 podavlyaet rasshirenie znaka. Eshche */
} /* mozhno sdelat' vse char -> unsigned char */
if( pindx < 0 ) return ( endpos - (plen-1));
/* Nashel: ves' obrazec vlozhilsya */
}
return( -1 ); /* Ne najdeno */
}
A. Bogatyrev, 1992-95 - 318 - Si v UNIX
/* Razmetka tablicy sdvigov */
void compilePatternBM( ptrn ) char *ptrn; {
register int c;
pattern = ptrn; plen = strlen(ptrn);
/* c - nomer bukvy alfavita */
for(c = 0; c < 256; c++)
d[c] = plen;
/* sdvig na dlinu vsego obrazca */
/* c - poziciya v obrazce */
for(c = 0; c < plen - 1; c++)
d[ pattern[c] & 0377 ] = DISTANCE(c);
/* Sdvig raven rasstoyaniyu ot samogo pravogo
* (krome poslednej bukvy obrazca)
* vhozhdeniya bukvy v obrazec do konca obrazca.
* Zametim, chto esli bukva vhodit v obrazec neskol'ko raz,
* to cikl uchityvaet poslednee (samoe pravoe) vhozhdenie.
*/
}
/* Pechat' najdennyh strok */
void pr(s, p, n, nl) char *s, *p;
{
register i;
printf("%4d\t%s\n", nl, s );
printf(" \t");
for(i = 0; i < n; i++ )
putchar( s[i] == '\t' ? '\t' : ' ' );
printf( "%s\n", p );
}
/* Analog programmy fgrep */
#include <stdio.h>
char str[ 1024 ]; /* bufer dlya prochitannoj stroki */
void main(ac, av) char **av;
{
int nline = 0; /* nomer stroki fajla */
int ind;
int retcode = 1;
if(ac != 2){
fprintf(stderr, "Usage: %s 'pattern'\n", av[0] );
exit(33);
}
compilePatternBM( av[1] );
while( gets(str) != NULL ){
nline++;
if((ind = indexBM(str)) >= 0 ){
retcode = 0; /* O'KAY */
pr(str, pattern, ind, nline);
}
}
exit(retcode);
}
A. Bogatyrev, 1992-95 - 319 - Si v UNIX
/* Primer raboty algoritma:
peter piper picked a peck of pickled peppers.
peck
peter piper picked a peck of pickled peppers.
peck
peter piper picked a peck of pickled peppers.
peck
peter piper picked a peck of pickled peppers.
peck
peter piper picked a peck of pickled peppers.
peck
peter piper picked a peck of pickled peppers.
peck
peter piper picked a peck of pickled peppers.
peck
peter piper picked a peck of pickled peppers.
peck
*/
7.45. Napishite analogichnuyu programmu, vydayushchuyu vse stroki, udovletvoryayushchie uproshchen-
nomu regulyarnomu vyrazheniyu, zadavaemomu kak argument dlya main(). Ispol'zujte funkciyu
match, napisannuyu nami ranee. Vy napisali analog programmy grep iz UNIX (no s drugim
tipom regulyarnogo vyrazheniya, nezheli v originale).
7.46. Sostav'te funkciyu expand(s1, s2), kotoraya rasshiryaet sokrashchennye oboznacheniya
vida a-z stroki s1 v ekvivalentnyj polnyj spisok abcd...xyz v stroke s2. Dopuskayutsya
sokrashcheniya dlya strochnyh i propisnyh bukv i cifr. Uchtite sluchai tipa a-b-c, a-z0-9 i
-a-g (soglashenie sostoit v tom, chto simvol "-", stoyashchij v nachale ili v konce, vospri-
nimaetsya bukval'no).
7.47. Napishite programmu, chitayushchuyu fajl i zamenyayushchuyu stroki vida
|<1 i bolee probelov i tabulyacij><tekst>
na pary strok
|.pp
|<tekst>
(zdes' | oboznachaet levyj kraj fajla, a <> - metasimvoly). |to - prostejshij prepro-
cessor, gotovyashchij tekst v formate nroff (eto formatter tekstov v UNIX). Uslozhneniya:
- stroki, nachinayushchiesya s tochki ili s apostrofa, zamenyat' na
\&<tekst, nachinayushchijsya s tochki ili '>
- stroki, nachinayushchiesya s cifry, zamenyat' na
.ip <chislo>
<tekst>
- simvol \ zamenyat' na posledovatel'nost' \e.
- udalyat' probely pered simvolami .,;:!?) i vstavlyat' posle nih probel (znak pre-
pinaniya dolzhen byt' prikleen k koncu slova, inache on mozhet byt' perenesen na
sleduyushchuyu stroku. Vy kogda-nibud' videli stroku, nachinayushchuyusya s zapyatoj?).
- skleivat' perenesennye slova, poskol'ku nroff delaet perenosy sam:
....xxxx nachalo- => ....xxxx nachalokonec
konec yyyy...... yyyy................
A. Bogatyrev, 1992-95 - 320 - Si v UNIX
Vyzyvajte etot preprocessor razmetki teksta tak:
$ prep fajly... | nroff -me > text.lp
7.48. Sostav'te programmu preobrazovaniya propisnyh bukv iz fajla vvoda v strochnye,
ispol'zuya pri etom funkciyu, v kotoroj neobhodimo organizovat' analiz simvola (dejst-
vitel'no li eto bukva). Strochnye bukvy vydavat' bez izmeneniya. Ukazanie: ispol'zujte
makrosy iz <ctype.h>.
Otvet:
#include <ctype.h>
#include <stdio.h>
main(){
int c;
while( (c = getchar()) != EOF )
putchar( isalpha( c ) ?
(isupper( c ) ? tolower( c ) : c) : c);
}
libo ...
putchar( isalpha(c) && isupper(c) ? tolower(c) : c );
libo dazhe
putchar( isupper(c) ? tolower(c) : c );
V poslednem sluchae pod isupper i islower dolzhny ponimat'sya tol'ko bukvy (uvy, ne vo
vseh realizaciyah eto tak!).
7.49. Obratite vnimanie, chto esli my vydelyaem klass simvolov pri pomoshchi sravneniya,
naprimer:
char ch;
if( 0300 <= ch && ch < 0340 ) ...;
(v kodirovke KOI-8 eto malen'kie russkie bukvy), to my mozhem natolknut'sya na sleduyu-
shchij syurpriz: pered sravneniem s celym znachenie ch privoditsya k tipu int (privedenie
takzhe delaetsya pri ispol'zovanii char v kachestve argumenta funkcii). Pri etom, esli
u ch byl ustanovlen starshij bit (0200), proizojdet rasshirenie ego vo ves' starshij
bajt (rasshirenie znakovogo bita). Rezul'tatom budet otricatel'noe celoe chislo! Opyt:
char c = '\201'; /* = 129 */
printf( "%d\n", c );
pechataetsya -127. Takim obrazom, nashe sravnenie ne srabotaet, t.k. okazyvaetsya chto ch
< 0. Sleduet podavlyat' rasshirenie znaka:
if( 0300 <= (ch & 0377) && (ch & 0377) < 0340) ...;
(0377 - maska iz 8 bit, ona zhe 0xFF, ves' bajt), libo ob®yavit'
unsigned char ch;
chto oznachaet, chto pri privedenii k int znakovyj bit ne rasshiryaetsya.
7.50. Rassmotrim eshche odin primer:
A. Bogatyrev, 1992-95 - 321 - Si v UNIX
main(){
char ch;
/* 0377 - kod poslednego simvola alfavita ASCII */
for (ch = 0100; ch <= 0377; ch++ )
printf( "%03o %s\n",
ch & 0377,
ch >= 0300 && ch < 0340 ? "yes" : "no" );
}
Kakie nepriyatnosti zhdut nas zdes'?
- vo-pervyh, kogda bit 0200 u ch ustanovlen, v sravnenii ch vystupaet kak otrica-
tel'noe celoe chislo (t.k. privedenie k int delaetsya rasshireniem znakovogo bita),
to est' u nas vsegda pechataetsya "no". |to my mozhem ispravit', napisav
unsigned char ch, libo ispol'zuya ch v vide
(ch & 0377) ili ((unsigned) ch)
- vo-vtoryh, rassmotrim sam cikl. Pust' sejchas ch =='\377'. Uslovie ch <= 0377
istinno. Vypolnyaetsya operator ch++. No ch - eto bajt, poetomu operacii nad nim
proizvodyatsya po modulyu 0400 (0377 - eto maksimal'noe znachenie, kotoroe mozhno
hranit' v bajte - vse bity edinicy). To est' teper' znacheniem ch stanet 0. No
0 < 0377 i uslovie cikla verno! Cikl prodolzhaetsya; t.e. proishodit zacikliva-
nie. Izbezhat' etogo mozhno tol'ko opisav int ch; chtoby 0377+1 bylo ravno 0400, a
ne 0 (ili unsigned int, lish' by dliny peremennoj hvatalo, chtoby vmestit' chislo
bol'she 0377).
7.51. Sostav'te programmu, preobrazuyushchuyu tekst, sostoyashchij tol'ko iz strochnyh bukv v
tekst, sostoyashchij iz propisnyh i strochnyh bukv. Pervaya bukva i bukva posle kazhdoj
tochki - propisnye, ostal'nye - strochnye.
slovo odin. slovo dva. -->
Slovo odin. Slovo dva.
|ta programma mozhet okazat'sya poleznoj dlya preobrazovaniya teksta, nabrannogo v odnom
registre, v tekst, soderzhashchij bukvy oboih registrov.
7.52. Napishite programmu, ispravlyayushchuyu opechatki v slovah (spell check): programme
zadan spisok slov; ona proveryaet - yavlyaetsya li vvedennoe vami slovo slovom iz spiska.
Esli net - pytaetsya najti naibolee pohozhee slovo iz spiska, prichem esli est' nes-
kol'ko pohozhih - vydaet vse varianty. Otlavlivajte sluchai:
- dve sosednie bukvy perestavleny mestami: nozhincy=>nozhnicy;
- udvoennaya bukva (bukvy): kkarrandash=>karandash;
- poteryana bukva: bot=>bolt;
- izmenennaya bukva: bint=>bant;
- lishnyaya bukva: morda=>moda;
- bukvy ne v tom registre - sravnite s kazhdym slovom iz spiska, privodya vse bukvy
k malen'kim: sOVOk=>sovok;
Nado proveryat' kazhduyu bukvu slova. Vozmozhno vam budet udobno ispol'zovat' rekursiyu.
Podskazka: dlya nekotoryh proverok vam mozhet pomoch' funkciya match:
slovo_tablicy = "dom";
if(strlen(vhodnoe_slovo) <= strlen(slovo_tablicy)+1 &&
match(vhodnoe_slovo, "*d*o*m*") ... /* pohozhe */
*o*m* ?dom dom?
*d*m* d?om
*d*o* do?m
Privedem variant resheniya etoj zadachi:
A. Bogatyrev, 1992-95 - 322 - Si v UNIX
#include <stdio.h>
#include <ctype.h>
#include <locale.h>
typedef unsigned char uchar;
#define ANYCHAR '*'
/* simvol, sopostavlyayushchijsya s odnoj lyuboj bukvoj */
static uchar version[120]; /* bufer dlya generacii variantov */
static uchar vv; /* bukva, sopostavivshayasya s ANYCHAR */
/* privesti vse bukvy k odnomu registru */
static uchar icase(uchar c){
return isupper(c) ? tolower(c) : c;
}
/* sravnenie strok s ignorirovaniem registra */
static int eqi(uchar *s1, uchar *s2 )
{
while( *s1 && *s2 ){
if( icase( *s1 ) != icase( *s2 ))
break;
s1++; s2++;
}
return ( ! *s1 && ! *s2 ) ? 1 : 0 ;
/* OK : FAIL */
}
/* sravnenie strok s ignorirovaniem ANYCHAR */
static strok(register uchar *word, register uchar *pat)
{
while( *word && *pat ){
if( *word == ANYCHAR){
/* Nevazhno, chto est' *pat, no zapomnim */
vv= *pat;
} else {
if( icase(*pat) != icase(*word) )
break;
}
word++; pat++;
}
/* esli slova konchilis' odnovremenno ... */
return ( !*word && !*pat) ? 1 : 0;
/* OK : FAIL */
}
A. Bogatyrev, 1992-95 - 323 - Si v UNIX
/* LISHNYAYA BUKVA */
static int superfluous( uchar *word /* slovo dlya korrekcii */
, uchar *s /* etalon */
){
register int i,j,k;
int reply;
register len = strlen(word);
for(i=0 ; i < len ; i++){
/* generim slova , poluchayushchiesya udaleniem odnoj bukvy */
k=0;
for(j=0 ; j < i ; j++)
version[k++]=word[j];
for(j=i+1 ; j < len ; j++)
version[k++]=word[j];
version[k]='\0';
if( eqi( version, s )) return 1; /* OK */
}
return 0; /* FAIL */
}
/* POTERYANA BUKVA */
static int hole; /* mesto, gde vstavlena ANYCHAR */
static int lost(uchar *word, uchar *s)
{
register int i,j,k;
register len = strlen(word);
hole= (-1);
for(i=0 ; i < len+1 ; i++){
k=0;
for(j=0 ; j < i ; j++)
version[k++]=word[j];
version[k++]=ANYCHAR;
for(j=i ; j < len ; j++)
version[k++]=word[j];
version[k]='\0';
if( strok( version, s )){
hole=i;
return 1; /* OK */
}
}
return 0; /* FAIL */
}
A. Bogatyrev, 1992-95 - 324 - Si v UNIX
/* IZMENILASX ODNA BUKVA (vklyuchaet sluchaj oshibki registra) */
static int changed(uchar *word, uchar *s)
{
register int i,j,k;
register len = strlen(word);
hole = (-1);
for(i=0 ; i < len ; i++){
k=0;
for( j=0 ; j < i ; j++)
version[k++]=word[j];
version[k++]=ANYCHAR;
for( j=i+1 ; j < len ; j++)
version[k++]=word[j];
version[k]='\0';
if( strok( version,s)){
hole=i;
return 1; /* OK */
}
}
return 0; /* FAIL */
}
/* UDVOENNAYA BUKVA */
static int duplicates(uchar *word, uchar *s, int leng)
{
register int i,j,k;
uchar tmp[80];
if( eqi( word, s )) return 1; /* OK */
for(i=0;i < leng - 1; i++)
/* ishchem parnye bukvy */
if( word[i]==word[i+1]){
k=0;
for(j=0 ; j < i ; j++)
tmp[k++]=word[j];
for(j=i+1 ; j < leng ; j++)
tmp[k++]=word[j];
tmp[k]='\0';
if( duplicates( tmp, s, leng-1) == 1)
return 1; /* OK */
}
return 0; /* FAIL */
}
A. Bogatyrev, 1992-95 - 325 - Si v UNIX
/* PERESTAVLENY SOSEDNIE BUKVY */
static int swapped(uchar *word, uchar *s)
{
register int i,j,k;
register len = strlen(word);
for(i=0;i < len-1;i++){
k=0;
for(j=0 ; j < i ; j++)
version[k++]=word[j];
version[k++]=word[i+1];
version[k++]=word[i];
for(j=i+2 ; j < len ; j++)
version[k++]=word[j];
version[k]='\0';
if( eqi( version, s))
return 1; /* OK */
}
return 0; /* FAIL */
}
uchar *words[] = {
(uchar *) "bag",
(uchar *) "bags",
(uchar *) "cook",
(uchar *) "cool",
(uchar *) "bug",
(uchar *) "buy",
(uchar *) "cock",
NULL
};
#define Bcase(x, operators) case x: { operators; } break;
char *cname[5] = {
"perestavleny bukvy",
"udvoeny bukvy ",
"poteryana bukva ",
"oshibochnaya bukva ",
"lishnyaya bukva "
};
A. Bogatyrev, 1992-95 - 326 - Si v UNIX
static int spellmatch( uchar *word /* IN slovo dlya korrekcii */
, uchar *words[] /* IN tablica dopustimyh slov */
, uchar **indx /* OUT otvet */
){
int i, code, total = (-1);
uchar **ptr;
if(!*word) return -1;
for(ptr = words; *ptr; ++ptr)
if(eqi(word, *ptr)){
if(indx) *indx = *ptr;
return 0;
}
/* Net v tablice, nuzhen podbor pohozhih */
for(ptr = words; *ptr; ++ptr){
uchar *s = *ptr;
int max = 5;
for(i=0; i < max; i++){
switch( i ){
Bcase(0,code = swapped(word, s) )
Bcase(1,code = duplicates(word, s, strlen(word)) )
Bcase(2,code = lost(word, s) )
Bcase(3,code = changed(word, s) )
Bcase(4,code = superfluous(word, s) )
}
if(code){
total++;
printf("?\t%s\t%s\n", cname[i], s);
if(indx) *indx = s;
/* V sluchae s dublikatami ne rassmatrivat'
* na nalichie lishnih bukv
*/
if(i==1) max = 4;
}
}
}
return total;
}
A. Bogatyrev, 1992-95 - 327 - Si v UNIX
void main(){
uchar inpbuf[BUFSIZ];
int n;
uchar *reply, **ptr;
setlocale(LC_ALL, "");
for(ptr = words; *ptr; ptr++)
printf("#\t%s\n", *ptr);
do{
printf("> "); fflush(stdout);
if(gets((char *)inpbuf) == NULL) break;
switch(spellmatch(inpbuf, words, &reply)){
case -1:
printf("Net takogo slova\n"); break;
case 0:
printf("Slovo '%s'\n", reply); break;
default:
printf("Neodnoznachno\n");
}
} while(1);
}
7.53. Poka ya sam pisal etu programmu, ya sdelal dve oshibki, kotorye dolzhny byt'
ves'ma harakterny dlya novichkov. Pro nih nado by govorit' ran'she, v glave pro stroki i
v samoj pervoj glave, no tut oni prishlis' kak raz k mestu. Vopros: chto pechataet sle-
duyushchaya programma?
#include <stdio.h>
char *strings[] = {
"Pervaya stroka"
"Vtoraya stroka"
"Tretyaya stroka",
"CHetvertaya stroka",
NULL
};
void main(){
char **p;
for(p=strings;*p;++p)
printf("%s\n", *p);
}
A pechataet ona vot chto:
Pervaya strokaVtoraya strokaTretyaya stroka
CHetvertaya stroka
Delo v tom, chto ANSI kompilyator Si skleivaet stroki:
"nachalo stroki" "i ee konec"
esli oni razdeleny probelami v smysle isspace, v tom chisle i pustymi strokami. A v
nashem ob®yavlenii massiva strok strings my pot