AWK is a very powerful tool for data parsing and extraction. It’s built in most Unix-like operating systems.
Build-in Variables:
$0: entire record
$1: first field
$2: second fieldFILENAME: input file name
length($1): len of first field
length: len of record, including separator.NR: number of count in current records
NF: number of filed in current recordsRS: define a record(row), default "\n"
FS: define a field(column), default "white space"ORS: output record separator
OFS: output field separator
Syntax:
# inline style
$ awk '{print $0}' inputFile# use pipe to pass stdout
$ ps | awk '{print $0}'# with different separator
$ echo "1|2|3" | awk -F '|' '{print $1}'
Run awk script
$ awk -f script.awk inputFile
Script Pattern
#!/usr/bin/awk -fBEGIN {
# preprocessing
}{
# main process
}END {
# post-processing
}
Count lines (wc)
$ awk 'END {print NR}' input
Count words (including separator)
awk '{c += length} END {print "Sum:", c}' input
Count words frequency
INPUT
a b c
b c d
c d e
f
gOUTPUT
./script input | sort1 a
1 e
1 f
1 g
2 b
2 d
3 c
script
#!/usr/bin/awk -f{
for (i=1; i<NF; i++)
words[tolower($i)]++
}END {
for (i in words)
print words[i], i
}
Count words frequency with multi separators
INPUT
a b c
b,c,d
c,d,e
f,
g,OUTPUT
./script input | sort1 a
1 e
1 f
1 g
2 b
2 d
3 c
script
#!/usr/bin/awk -fBEGIN {
FS = [, ]
}
{
for (i=1; i<NF; i++)
if (length($i)) # bypass f, case
words[tolower($i)]++
}END {
for (i in words)
print words[i], i
}
Reshape array
INPUT
1
2
3
4
5
6
7
8
9OUTPUT
1 2 3
4 5 6
7 8 9
script (use printf decide to use ORS or OFS)
#!/usr/bin/awk -f{
for (i = 1; i < NF; i++)
printf "%s", %i (++count % 3 ? OFS: ORS)
}
Auto next line
INPUT1
2
3
4
5
6
7
8
9OUTPUT
./script n=5 input1,6
2,7
3,8
4,9
5
script
#!/usr/bin/awk -fBEGIN {
OFS = ","
}{
for (i = 1; i < NF; i++)
data[count++] = $i
}END {
for (i = 0; i < n; i++)
for (j = i; j < count; j += n)
printf "%s", data[j] (j + n < count? OFS : ORS)
}
Filter Lines
Input
123
456
789
Error: aaaaa
5Output
123
456
789
5
script
#!/usr/bin/awk -f{
if (!($0 ~ "Error"))
print($0)
}
Inline 語法
awk '/pattern/ { actions }'# BEGIN 和 END 就只做一次;中間會基於符合 pattern 的每RS行都做一次
awk
'BEGIN { actions }
/pattern/ { actions }
/pattern/ { actions }
{ actions }
END { actions }'
範例
# 把 I 開頭的第一個 word 都印出來
awk '/^I/ {print $1}' content.txt
I123
Iphone# 把包含 avg= 的行都印出來
awk '{if ($0 ~ "avg=") print %0}' content.txt# 印出 match 的最後一行,第一個括弧每行都做,END最後才做
awk '{if ($0 ~ "avg=") data[count++] = $0} END {print data[count-1]}' content.txt# 改 FS 直接用 FS 找出第幾個
echo "abc=123" | awk 'BEGIN {FS="abc="} {if (NF > 1) print $2)}'
123