Sorting hash values by length of lines in a text files
up vote
1
down vote
favorite
Read line in file1, if line not exist in file2 write this line to the new file .
Comparison is made by the length of the hash string .
Hash values in file1:
cf03189f5b05eb1a9658f80d7a0e9f02:_#.g}
edbe6de8b3ee19b45e092147f57af7b8:]mNon
47253940f843f258ffd265d13f365d70:/u'yv
5701aa8e2aa7e1cfd16ca4076bd1732a:@AQ1z
b3c0866e6fd56776bc4a18d3c87cc725:t$5OV
7a1e6090568e076c55df9dc7abf356c6:9rC@p
04046da33706518d9b15a38bcddb448e:!DFPk
Hash values in file2:
edbe6de8b3ee19b45e092147f57af7b8:]mNon:str1
47253940f843f258ffd265d13f365d70:/u'yv:2str
b3c0866e6fd56776bc4a18d3c87cc725:t$5OV:3str1ng
This is a working example written in C:
#include <stdio.h>
#include <string.h>
#define LINE_LENGTH 80
typedef enum {FALSE=0,TRUE=1} BOOL;
int main(int argc, char *argv)
{
FILE *fpin1 = NULL;
FILE *fpin2 = NULL;
FILE *fpout = NULL;
char line [LINE_LENGTH]={0};
char line1[LINE_LENGTH]={0};
BOOL bCheck = FALSE;
size_t ncHash = 0;
size_t count = 0;
if (argc != 4)
{
printf("Usage:%s <file1> <file2> <OutFile>n", argv[0]);
return 1;
}
/*
Opening input files (file1 and file2) for reading in text mode.
The output file (OutFile) is open for writing.
*/
if(((fpin1=fopen(argv[1],"r"))==NULL) ||
((fpin2=fopen(argv[2],"r"))==NULL) ||
((fpout=fopen(argv[3],"w"))==NULL))
{
printf("Error! Could not open filesn");
return 1;
}
while(fgets(line, sizeof(line), fpin1)!=NULL) /* Read hash line from the first file1 */
{
bCheck=FALSE;
while(fgets(line1, sizeof(line1), fpin2)!=NULL) /* Read hash line from the second file2 */
{
if(!strncmp(line, line1, 38)) /* Compares 38 characters of the line in file1 to those of the file2 */
{
bCheck=!bCheck;
break;
}
}
if(!bCheck) /* Does compared line are the same ? */
{
fputs(line,fpout); /* Yes - write them in a file OutFile */
ncHash++; /* Count identical lines */
}
rewind(fpin2); /* Seek to the beginning of the file2 */
count++; /* Counting the read lines in file1 */
}
printf("nDone...n");
fclose( fpin1);
fclose( fpin2);
fclose( fpout);
return 0;
}
The OutFile from the program is:
cf03189f5b05eb1a9658f80d7a0e9f02:_#.g}
5701aa8e2aa7e1cfd16ca4076bd1732a:@AQ1z
7a1e6090568e076c55df9dc7abf356c6:9rC@p
04046da33706518d9b15a38bcddb448e:!DFPk
- I want to know how to write it in
awk
?
regex perl sed awk pattern-matching
add a comment |
up vote
1
down vote
favorite
Read line in file1, if line not exist in file2 write this line to the new file .
Comparison is made by the length of the hash string .
Hash values in file1:
cf03189f5b05eb1a9658f80d7a0e9f02:_#.g}
edbe6de8b3ee19b45e092147f57af7b8:]mNon
47253940f843f258ffd265d13f365d70:/u'yv
5701aa8e2aa7e1cfd16ca4076bd1732a:@AQ1z
b3c0866e6fd56776bc4a18d3c87cc725:t$5OV
7a1e6090568e076c55df9dc7abf356c6:9rC@p
04046da33706518d9b15a38bcddb448e:!DFPk
Hash values in file2:
edbe6de8b3ee19b45e092147f57af7b8:]mNon:str1
47253940f843f258ffd265d13f365d70:/u'yv:2str
b3c0866e6fd56776bc4a18d3c87cc725:t$5OV:3str1ng
This is a working example written in C:
#include <stdio.h>
#include <string.h>
#define LINE_LENGTH 80
typedef enum {FALSE=0,TRUE=1} BOOL;
int main(int argc, char *argv)
{
FILE *fpin1 = NULL;
FILE *fpin2 = NULL;
FILE *fpout = NULL;
char line [LINE_LENGTH]={0};
char line1[LINE_LENGTH]={0};
BOOL bCheck = FALSE;
size_t ncHash = 0;
size_t count = 0;
if (argc != 4)
{
printf("Usage:%s <file1> <file2> <OutFile>n", argv[0]);
return 1;
}
/*
Opening input files (file1 and file2) for reading in text mode.
The output file (OutFile) is open for writing.
*/
if(((fpin1=fopen(argv[1],"r"))==NULL) ||
((fpin2=fopen(argv[2],"r"))==NULL) ||
((fpout=fopen(argv[3],"w"))==NULL))
{
printf("Error! Could not open filesn");
return 1;
}
while(fgets(line, sizeof(line), fpin1)!=NULL) /* Read hash line from the first file1 */
{
bCheck=FALSE;
while(fgets(line1, sizeof(line1), fpin2)!=NULL) /* Read hash line from the second file2 */
{
if(!strncmp(line, line1, 38)) /* Compares 38 characters of the line in file1 to those of the file2 */
{
bCheck=!bCheck;
break;
}
}
if(!bCheck) /* Does compared line are the same ? */
{
fputs(line,fpout); /* Yes - write them in a file OutFile */
ncHash++; /* Count identical lines */
}
rewind(fpin2); /* Seek to the beginning of the file2 */
count++; /* Counting the read lines in file1 */
}
printf("nDone...n");
fclose( fpin1);
fclose( fpin2);
fclose( fpout);
return 0;
}
The OutFile from the program is:
cf03189f5b05eb1a9658f80d7a0e9f02:_#.g}
5701aa8e2aa7e1cfd16ca4076bd1732a:@AQ1z
7a1e6090568e076c55df9dc7abf356c6:9rC@p
04046da33706518d9b15a38bcddb448e:!DFPk
- I want to know how to write it in
awk
?
regex perl sed awk pattern-matching
1
Look at thecomm
orjoin
standard unix utilities.
– Stephane Chazelas
Jun 24 '13 at 14:32
join -v1 all <(cut -d: -f1,2 excludes)
– Kevin
Jun 24 '13 at 15:10
add a comment |
up vote
1
down vote
favorite
up vote
1
down vote
favorite
Read line in file1, if line not exist in file2 write this line to the new file .
Comparison is made by the length of the hash string .
Hash values in file1:
cf03189f5b05eb1a9658f80d7a0e9f02:_#.g}
edbe6de8b3ee19b45e092147f57af7b8:]mNon
47253940f843f258ffd265d13f365d70:/u'yv
5701aa8e2aa7e1cfd16ca4076bd1732a:@AQ1z
b3c0866e6fd56776bc4a18d3c87cc725:t$5OV
7a1e6090568e076c55df9dc7abf356c6:9rC@p
04046da33706518d9b15a38bcddb448e:!DFPk
Hash values in file2:
edbe6de8b3ee19b45e092147f57af7b8:]mNon:str1
47253940f843f258ffd265d13f365d70:/u'yv:2str
b3c0866e6fd56776bc4a18d3c87cc725:t$5OV:3str1ng
This is a working example written in C:
#include <stdio.h>
#include <string.h>
#define LINE_LENGTH 80
typedef enum {FALSE=0,TRUE=1} BOOL;
int main(int argc, char *argv)
{
FILE *fpin1 = NULL;
FILE *fpin2 = NULL;
FILE *fpout = NULL;
char line [LINE_LENGTH]={0};
char line1[LINE_LENGTH]={0};
BOOL bCheck = FALSE;
size_t ncHash = 0;
size_t count = 0;
if (argc != 4)
{
printf("Usage:%s <file1> <file2> <OutFile>n", argv[0]);
return 1;
}
/*
Opening input files (file1 and file2) for reading in text mode.
The output file (OutFile) is open for writing.
*/
if(((fpin1=fopen(argv[1],"r"))==NULL) ||
((fpin2=fopen(argv[2],"r"))==NULL) ||
((fpout=fopen(argv[3],"w"))==NULL))
{
printf("Error! Could not open filesn");
return 1;
}
while(fgets(line, sizeof(line), fpin1)!=NULL) /* Read hash line from the first file1 */
{
bCheck=FALSE;
while(fgets(line1, sizeof(line1), fpin2)!=NULL) /* Read hash line from the second file2 */
{
if(!strncmp(line, line1, 38)) /* Compares 38 characters of the line in file1 to those of the file2 */
{
bCheck=!bCheck;
break;
}
}
if(!bCheck) /* Does compared line are the same ? */
{
fputs(line,fpout); /* Yes - write them in a file OutFile */
ncHash++; /* Count identical lines */
}
rewind(fpin2); /* Seek to the beginning of the file2 */
count++; /* Counting the read lines in file1 */
}
printf("nDone...n");
fclose( fpin1);
fclose( fpin2);
fclose( fpout);
return 0;
}
The OutFile from the program is:
cf03189f5b05eb1a9658f80d7a0e9f02:_#.g}
5701aa8e2aa7e1cfd16ca4076bd1732a:@AQ1z
7a1e6090568e076c55df9dc7abf356c6:9rC@p
04046da33706518d9b15a38bcddb448e:!DFPk
- I want to know how to write it in
awk
?
regex perl sed awk pattern-matching
Read line in file1, if line not exist in file2 write this line to the new file .
Comparison is made by the length of the hash string .
Hash values in file1:
cf03189f5b05eb1a9658f80d7a0e9f02:_#.g}
edbe6de8b3ee19b45e092147f57af7b8:]mNon
47253940f843f258ffd265d13f365d70:/u'yv
5701aa8e2aa7e1cfd16ca4076bd1732a:@AQ1z
b3c0866e6fd56776bc4a18d3c87cc725:t$5OV
7a1e6090568e076c55df9dc7abf356c6:9rC@p
04046da33706518d9b15a38bcddb448e:!DFPk
Hash values in file2:
edbe6de8b3ee19b45e092147f57af7b8:]mNon:str1
47253940f843f258ffd265d13f365d70:/u'yv:2str
b3c0866e6fd56776bc4a18d3c87cc725:t$5OV:3str1ng
This is a working example written in C:
#include <stdio.h>
#include <string.h>
#define LINE_LENGTH 80
typedef enum {FALSE=0,TRUE=1} BOOL;
int main(int argc, char *argv)
{
FILE *fpin1 = NULL;
FILE *fpin2 = NULL;
FILE *fpout = NULL;
char line [LINE_LENGTH]={0};
char line1[LINE_LENGTH]={0};
BOOL bCheck = FALSE;
size_t ncHash = 0;
size_t count = 0;
if (argc != 4)
{
printf("Usage:%s <file1> <file2> <OutFile>n", argv[0]);
return 1;
}
/*
Opening input files (file1 and file2) for reading in text mode.
The output file (OutFile) is open for writing.
*/
if(((fpin1=fopen(argv[1],"r"))==NULL) ||
((fpin2=fopen(argv[2],"r"))==NULL) ||
((fpout=fopen(argv[3],"w"))==NULL))
{
printf("Error! Could not open filesn");
return 1;
}
while(fgets(line, sizeof(line), fpin1)!=NULL) /* Read hash line from the first file1 */
{
bCheck=FALSE;
while(fgets(line1, sizeof(line1), fpin2)!=NULL) /* Read hash line from the second file2 */
{
if(!strncmp(line, line1, 38)) /* Compares 38 characters of the line in file1 to those of the file2 */
{
bCheck=!bCheck;
break;
}
}
if(!bCheck) /* Does compared line are the same ? */
{
fputs(line,fpout); /* Yes - write them in a file OutFile */
ncHash++; /* Count identical lines */
}
rewind(fpin2); /* Seek to the beginning of the file2 */
count++; /* Counting the read lines in file1 */
}
printf("nDone...n");
fclose( fpin1);
fclose( fpin2);
fclose( fpout);
return 0;
}
The OutFile from the program is:
cf03189f5b05eb1a9658f80d7a0e9f02:_#.g}
5701aa8e2aa7e1cfd16ca4076bd1732a:@AQ1z
7a1e6090568e076c55df9dc7abf356c6:9rC@p
04046da33706518d9b15a38bcddb448e:!DFPk
- I want to know how to write it in
awk
?
regex perl sed awk pattern-matching
regex perl sed awk pattern-matching
edited Nov 11 at 7:02
Cœur
17k9102140
17k9102140
asked Jun 24 '13 at 14:29
boleto
7611627
7611627
1
Look at thecomm
orjoin
standard unix utilities.
– Stephane Chazelas
Jun 24 '13 at 14:32
join -v1 all <(cut -d: -f1,2 excludes)
– Kevin
Jun 24 '13 at 15:10
add a comment |
1
Look at thecomm
orjoin
standard unix utilities.
– Stephane Chazelas
Jun 24 '13 at 14:32
join -v1 all <(cut -d: -f1,2 excludes)
– Kevin
Jun 24 '13 at 15:10
1
1
Look at the
comm
or join
standard unix utilities.– Stephane Chazelas
Jun 24 '13 at 14:32
Look at the
comm
or join
standard unix utilities.– Stephane Chazelas
Jun 24 '13 at 14:32
join -v1 all <(cut -d: -f1,2 excludes)
– Kevin
Jun 24 '13 at 15:10
join -v1 all <(cut -d: -f1,2 excludes)
– Kevin
Jun 24 '13 at 15:10
add a comment |
4 Answers
4
active
oldest
votes
up vote
3
down vote
accepted
awk -F: 'NR==FNR{a[$1,$2];next}!(($1,$2) in a)' excludes.txt all.txt
Note the reversal of file argument order.
An explanation:-F:
- use :
as a field separatorNR==FNR
- The first file (current row number = total row number)a[$1,$2]
- Touch the array at the first two fields
next
- move on to the next line, so we don't have to check the alternate NR != FNR
condition!(($1,$2) in a)
- check whether the combination was seen. If not, print the line (default action)
Does not work for me(awk -W version: GNU Awk 3.1.8 Copyright © 1998, 1991-2010 Free Software Foundation.)
: outputs 7 lines..
– xtof pernod
Jun 24 '13 at 14:57
Works on mac/bsd awk, I'm checking but try replacing$1,$2
with$1":"$2
both places.
– Kevin
Jun 24 '13 at 15:00
Also works onmawk
, the default awk on ubuntu server 13.04
– Kevin
Jun 24 '13 at 15:03
Works on GNU Awk 4.1.0, API: 1.0
– Kevin
Jun 24 '13 at 15:05
script work properly, but must be careful with spaces. For example if you copy/paste lines directly from html form, some empty spaces will be added at end of line and result will be incorrect. @xtof pernaud solution does not suffer from this inconvenience.
– boleto
Jun 24 '13 at 18:07
add a comment |
up vote
2
down vote
Code for GNU sed:
sed -r 'sµ(.*):.*$µ\§1§dµ' file2 |sed -f - file1
Because of the many 'ugly' characters the code is for info only, do not use in production.
add a comment |
up vote
1
down vote
I suppose you mean awk
. It's possible to do it, but it will eat 2 times the files sizes of memory:
cat file1 file2 |
awk '{ s = substr($0, 1, 38); str[NR] = s; ex[s]++; }
END {
for (i = 1; i <= NR; i++) {
s = str[i];
if (ex[s] == 1)
print s;
}
}'
Output:
cf03189f5b05eb1a9658f80d7a0e9f02:_#.g}
5701aa8e2aa7e1cfd16ca4076bd1732a:@AQ1z
7a1e6090568e076c55df9dc7abf356c6:9rC@p
04046da33706518d9b15a38bcddb448e:!DFPk
add a comment |
up vote
1
down vote
In perl
perl -F: -ane'BEGIN{$f=$ARGV[0]}print if$ARGV ne$f&&!$h{$F[0]};$h{$F[0]}=1' file2 file1
add a comment |
4 Answers
4
active
oldest
votes
4 Answers
4
active
oldest
votes
active
oldest
votes
active
oldest
votes
up vote
3
down vote
accepted
awk -F: 'NR==FNR{a[$1,$2];next}!(($1,$2) in a)' excludes.txt all.txt
Note the reversal of file argument order.
An explanation:-F:
- use :
as a field separatorNR==FNR
- The first file (current row number = total row number)a[$1,$2]
- Touch the array at the first two fields
next
- move on to the next line, so we don't have to check the alternate NR != FNR
condition!(($1,$2) in a)
- check whether the combination was seen. If not, print the line (default action)
Does not work for me(awk -W version: GNU Awk 3.1.8 Copyright © 1998, 1991-2010 Free Software Foundation.)
: outputs 7 lines..
– xtof pernod
Jun 24 '13 at 14:57
Works on mac/bsd awk, I'm checking but try replacing$1,$2
with$1":"$2
both places.
– Kevin
Jun 24 '13 at 15:00
Also works onmawk
, the default awk on ubuntu server 13.04
– Kevin
Jun 24 '13 at 15:03
Works on GNU Awk 4.1.0, API: 1.0
– Kevin
Jun 24 '13 at 15:05
script work properly, but must be careful with spaces. For example if you copy/paste lines directly from html form, some empty spaces will be added at end of line and result will be incorrect. @xtof pernaud solution does not suffer from this inconvenience.
– boleto
Jun 24 '13 at 18:07
add a comment |
up vote
3
down vote
accepted
awk -F: 'NR==FNR{a[$1,$2];next}!(($1,$2) in a)' excludes.txt all.txt
Note the reversal of file argument order.
An explanation:-F:
- use :
as a field separatorNR==FNR
- The first file (current row number = total row number)a[$1,$2]
- Touch the array at the first two fields
next
- move on to the next line, so we don't have to check the alternate NR != FNR
condition!(($1,$2) in a)
- check whether the combination was seen. If not, print the line (default action)
Does not work for me(awk -W version: GNU Awk 3.1.8 Copyright © 1998, 1991-2010 Free Software Foundation.)
: outputs 7 lines..
– xtof pernod
Jun 24 '13 at 14:57
Works on mac/bsd awk, I'm checking but try replacing$1,$2
with$1":"$2
both places.
– Kevin
Jun 24 '13 at 15:00
Also works onmawk
, the default awk on ubuntu server 13.04
– Kevin
Jun 24 '13 at 15:03
Works on GNU Awk 4.1.0, API: 1.0
– Kevin
Jun 24 '13 at 15:05
script work properly, but must be careful with spaces. For example if you copy/paste lines directly from html form, some empty spaces will be added at end of line and result will be incorrect. @xtof pernaud solution does not suffer from this inconvenience.
– boleto
Jun 24 '13 at 18:07
add a comment |
up vote
3
down vote
accepted
up vote
3
down vote
accepted
awk -F: 'NR==FNR{a[$1,$2];next}!(($1,$2) in a)' excludes.txt all.txt
Note the reversal of file argument order.
An explanation:-F:
- use :
as a field separatorNR==FNR
- The first file (current row number = total row number)a[$1,$2]
- Touch the array at the first two fields
next
- move on to the next line, so we don't have to check the alternate NR != FNR
condition!(($1,$2) in a)
- check whether the combination was seen. If not, print the line (default action)
awk -F: 'NR==FNR{a[$1,$2];next}!(($1,$2) in a)' excludes.txt all.txt
Note the reversal of file argument order.
An explanation:-F:
- use :
as a field separatorNR==FNR
- The first file (current row number = total row number)a[$1,$2]
- Touch the array at the first two fields
next
- move on to the next line, so we don't have to check the alternate NR != FNR
condition!(($1,$2) in a)
- check whether the combination was seen. If not, print the line (default action)
edited Jun 24 '13 at 14:54
answered Jun 24 '13 at 14:47
Kevin
38.2k977109
38.2k977109
Does not work for me(awk -W version: GNU Awk 3.1.8 Copyright © 1998, 1991-2010 Free Software Foundation.)
: outputs 7 lines..
– xtof pernod
Jun 24 '13 at 14:57
Works on mac/bsd awk, I'm checking but try replacing$1,$2
with$1":"$2
both places.
– Kevin
Jun 24 '13 at 15:00
Also works onmawk
, the default awk on ubuntu server 13.04
– Kevin
Jun 24 '13 at 15:03
Works on GNU Awk 4.1.0, API: 1.0
– Kevin
Jun 24 '13 at 15:05
script work properly, but must be careful with spaces. For example if you copy/paste lines directly from html form, some empty spaces will be added at end of line and result will be incorrect. @xtof pernaud solution does not suffer from this inconvenience.
– boleto
Jun 24 '13 at 18:07
add a comment |
Does not work for me(awk -W version: GNU Awk 3.1.8 Copyright © 1998, 1991-2010 Free Software Foundation.)
: outputs 7 lines..
– xtof pernod
Jun 24 '13 at 14:57
Works on mac/bsd awk, I'm checking but try replacing$1,$2
with$1":"$2
both places.
– Kevin
Jun 24 '13 at 15:00
Also works onmawk
, the default awk on ubuntu server 13.04
– Kevin
Jun 24 '13 at 15:03
Works on GNU Awk 4.1.0, API: 1.0
– Kevin
Jun 24 '13 at 15:05
script work properly, but must be careful with spaces. For example if you copy/paste lines directly from html form, some empty spaces will be added at end of line and result will be incorrect. @xtof pernaud solution does not suffer from this inconvenience.
– boleto
Jun 24 '13 at 18:07
Does not work for me
(awk -W version: GNU Awk 3.1.8 Copyright © 1998, 1991-2010 Free Software Foundation.)
: outputs 7 lines..– xtof pernod
Jun 24 '13 at 14:57
Does not work for me
(awk -W version: GNU Awk 3.1.8 Copyright © 1998, 1991-2010 Free Software Foundation.)
: outputs 7 lines..– xtof pernod
Jun 24 '13 at 14:57
Works on mac/bsd awk, I'm checking but try replacing
$1,$2
with $1":"$2
both places.– Kevin
Jun 24 '13 at 15:00
Works on mac/bsd awk, I'm checking but try replacing
$1,$2
with $1":"$2
both places.– Kevin
Jun 24 '13 at 15:00
Also works on
mawk
, the default awk on ubuntu server 13.04– Kevin
Jun 24 '13 at 15:03
Also works on
mawk
, the default awk on ubuntu server 13.04– Kevin
Jun 24 '13 at 15:03
Works on GNU Awk 4.1.0, API: 1.0
– Kevin
Jun 24 '13 at 15:05
Works on GNU Awk 4.1.0, API: 1.0
– Kevin
Jun 24 '13 at 15:05
script work properly, but must be careful with spaces. For example if you copy/paste lines directly from html form, some empty spaces will be added at end of line and result will be incorrect. @xtof pernaud solution does not suffer from this inconvenience.
– boleto
Jun 24 '13 at 18:07
script work properly, but must be careful with spaces. For example if you copy/paste lines directly from html form, some empty spaces will be added at end of line and result will be incorrect. @xtof pernaud solution does not suffer from this inconvenience.
– boleto
Jun 24 '13 at 18:07
add a comment |
up vote
2
down vote
Code for GNU sed:
sed -r 'sµ(.*):.*$µ\§1§dµ' file2 |sed -f - file1
Because of the many 'ugly' characters the code is for info only, do not use in production.
add a comment |
up vote
2
down vote
Code for GNU sed:
sed -r 'sµ(.*):.*$µ\§1§dµ' file2 |sed -f - file1
Because of the many 'ugly' characters the code is for info only, do not use in production.
add a comment |
up vote
2
down vote
up vote
2
down vote
Code for GNU sed:
sed -r 'sµ(.*):.*$µ\§1§dµ' file2 |sed -f - file1
Because of the many 'ugly' characters the code is for info only, do not use in production.
Code for GNU sed:
sed -r 'sµ(.*):.*$µ\§1§dµ' file2 |sed -f - file1
Because of the many 'ugly' characters the code is for info only, do not use in production.
answered Jun 24 '13 at 18:39
captcha
3,598720
3,598720
add a comment |
add a comment |
up vote
1
down vote
I suppose you mean awk
. It's possible to do it, but it will eat 2 times the files sizes of memory:
cat file1 file2 |
awk '{ s = substr($0, 1, 38); str[NR] = s; ex[s]++; }
END {
for (i = 1; i <= NR; i++) {
s = str[i];
if (ex[s] == 1)
print s;
}
}'
Output:
cf03189f5b05eb1a9658f80d7a0e9f02:_#.g}
5701aa8e2aa7e1cfd16ca4076bd1732a:@AQ1z
7a1e6090568e076c55df9dc7abf356c6:9rC@p
04046da33706518d9b15a38bcddb448e:!DFPk
add a comment |
up vote
1
down vote
I suppose you mean awk
. It's possible to do it, but it will eat 2 times the files sizes of memory:
cat file1 file2 |
awk '{ s = substr($0, 1, 38); str[NR] = s; ex[s]++; }
END {
for (i = 1; i <= NR; i++) {
s = str[i];
if (ex[s] == 1)
print s;
}
}'
Output:
cf03189f5b05eb1a9658f80d7a0e9f02:_#.g}
5701aa8e2aa7e1cfd16ca4076bd1732a:@AQ1z
7a1e6090568e076c55df9dc7abf356c6:9rC@p
04046da33706518d9b15a38bcddb448e:!DFPk
add a comment |
up vote
1
down vote
up vote
1
down vote
I suppose you mean awk
. It's possible to do it, but it will eat 2 times the files sizes of memory:
cat file1 file2 |
awk '{ s = substr($0, 1, 38); str[NR] = s; ex[s]++; }
END {
for (i = 1; i <= NR; i++) {
s = str[i];
if (ex[s] == 1)
print s;
}
}'
Output:
cf03189f5b05eb1a9658f80d7a0e9f02:_#.g}
5701aa8e2aa7e1cfd16ca4076bd1732a:@AQ1z
7a1e6090568e076c55df9dc7abf356c6:9rC@p
04046da33706518d9b15a38bcddb448e:!DFPk
I suppose you mean awk
. It's possible to do it, but it will eat 2 times the files sizes of memory:
cat file1 file2 |
awk '{ s = substr($0, 1, 38); str[NR] = s; ex[s]++; }
END {
for (i = 1; i <= NR; i++) {
s = str[i];
if (ex[s] == 1)
print s;
}
}'
Output:
cf03189f5b05eb1a9658f80d7a0e9f02:_#.g}
5701aa8e2aa7e1cfd16ca4076bd1732a:@AQ1z
7a1e6090568e076c55df9dc7abf356c6:9rC@p
04046da33706518d9b15a38bcddb448e:!DFPk
edited Jun 24 '13 at 19:44
answered Jun 24 '13 at 14:49
xtof pernod
81757
81757
add a comment |
add a comment |
up vote
1
down vote
In perl
perl -F: -ane'BEGIN{$f=$ARGV[0]}print if$ARGV ne$f&&!$h{$F[0]};$h{$F[0]}=1' file2 file1
add a comment |
up vote
1
down vote
In perl
perl -F: -ane'BEGIN{$f=$ARGV[0]}print if$ARGV ne$f&&!$h{$F[0]};$h{$F[0]}=1' file2 file1
add a comment |
up vote
1
down vote
up vote
1
down vote
In perl
perl -F: -ane'BEGIN{$f=$ARGV[0]}print if$ARGV ne$f&&!$h{$F[0]};$h{$F[0]}=1' file2 file1
In perl
perl -F: -ane'BEGIN{$f=$ARGV[0]}print if$ARGV ne$f&&!$h{$F[0]};$h{$F[0]}=1' file2 file1
answered Aug 11 '13 at 7:14
Hynek -Pichi- Vychodil
23.4k54166
23.4k54166
add a comment |
add a comment |
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f17277972%2fsorting-hash-values-by-length-of-lines-in-a-text-files%23new-answer', 'question_page');
}
);
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
1
Look at the
comm
orjoin
standard unix utilities.– Stephane Chazelas
Jun 24 '13 at 14:32
join -v1 all <(cut -d: -f1,2 excludes)
– Kevin
Jun 24 '13 at 15:10