Sorting hash values by length of lines in a text files











up vote
1
down vote

favorite












Read line in file1, if line not exist in file2 write this line to the new file .

Comparison is made by the length of the hash string .



Hash values ​​in file1:



cf03189f5b05eb1a9658f80d7a0e9f02:_#.g}
edbe6de8b3ee19b45e092147f57af7b8:]mNon
47253940f843f258ffd265d13f365d70:/u'yv
5701aa8e2aa7e1cfd16ca4076bd1732a:@AQ1z
b3c0866e6fd56776bc4a18d3c87cc725:t$5OV
7a1e6090568e076c55df9dc7abf356c6:9rC@p
04046da33706518d9b15a38bcddb448e:!DFPk


Hash values ​​in file2:



edbe6de8b3ee19b45e092147f57af7b8:]mNon:str1
47253940f843f258ffd265d13f365d70:/u'yv:2str
b3c0866e6fd56776bc4a18d3c87cc725:t$5OV:3str1ng


This is a working example written in C:



#include <stdio.h>
#include <string.h>

#define LINE_LENGTH 80
typedef enum {FALSE=0,TRUE=1} BOOL;
int main(int argc, char *argv)
{
FILE *fpin1 = NULL;
FILE *fpin2 = NULL;
FILE *fpout = NULL;
char line [LINE_LENGTH]={0};
char line1[LINE_LENGTH]={0};
BOOL bCheck = FALSE;
size_t ncHash = 0;
size_t count = 0;

if (argc != 4)
{
printf("Usage:%s <file1> <file2> <OutFile>n", argv[0]);
return 1;
}
/*
Opening input files (file1 and file2) for reading in text mode.
The output file (OutFile) is open for writing.
*/
if(((fpin1=fopen(argv[1],"r"))==NULL) ||
((fpin2=fopen(argv[2],"r"))==NULL) ||
((fpout=fopen(argv[3],"w"))==NULL))
{
printf("Error! Could not open filesn");
return 1;
}
while(fgets(line, sizeof(line), fpin1)!=NULL) /* Read hash line from the first file1 */
{
bCheck=FALSE;
while(fgets(line1, sizeof(line1), fpin2)!=NULL) /* Read hash line from the second file2 */
{
if(!strncmp(line, line1, 38)) /* Compares 38 characters of the line in file1 to those of the file2 */
{
bCheck=!bCheck;
break;
}
}
if(!bCheck) /* Does compared line are the same ? */
{
fputs(line,fpout); /* Yes - write them in a file OutFile */
ncHash++; /* Count identical lines */
}
rewind(fpin2); /* Seek to the beginning of the file2 */
count++; /* Counting the read lines in file1 */
}
printf("nDone...n");

fclose( fpin1);
fclose( fpin2);
fclose( fpout);
return 0;
}


The OutFile from the program is:



cf03189f5b05eb1a9658f80d7a0e9f02:_#.g}
5701aa8e2aa7e1cfd16ca4076bd1732a:@AQ1z
7a1e6090568e076c55df9dc7abf356c6:9rC@p
04046da33706518d9b15a38bcddb448e:!DFPk





  • I want to know how to write it in awk?










share|improve this question




















  • 1




    Look at the comm or join standard unix utilities.
    – Stephane Chazelas
    Jun 24 '13 at 14:32












  • join -v1 all <(cut -d: -f1,2 excludes)
    – Kevin
    Jun 24 '13 at 15:10















up vote
1
down vote

favorite












Read line in file1, if line not exist in file2 write this line to the new file .

Comparison is made by the length of the hash string .



Hash values ​​in file1:



cf03189f5b05eb1a9658f80d7a0e9f02:_#.g}
edbe6de8b3ee19b45e092147f57af7b8:]mNon
47253940f843f258ffd265d13f365d70:/u'yv
5701aa8e2aa7e1cfd16ca4076bd1732a:@AQ1z
b3c0866e6fd56776bc4a18d3c87cc725:t$5OV
7a1e6090568e076c55df9dc7abf356c6:9rC@p
04046da33706518d9b15a38bcddb448e:!DFPk


Hash values ​​in file2:



edbe6de8b3ee19b45e092147f57af7b8:]mNon:str1
47253940f843f258ffd265d13f365d70:/u'yv:2str
b3c0866e6fd56776bc4a18d3c87cc725:t$5OV:3str1ng


This is a working example written in C:



#include <stdio.h>
#include <string.h>

#define LINE_LENGTH 80
typedef enum {FALSE=0,TRUE=1} BOOL;
int main(int argc, char *argv)
{
FILE *fpin1 = NULL;
FILE *fpin2 = NULL;
FILE *fpout = NULL;
char line [LINE_LENGTH]={0};
char line1[LINE_LENGTH]={0};
BOOL bCheck = FALSE;
size_t ncHash = 0;
size_t count = 0;

if (argc != 4)
{
printf("Usage:%s <file1> <file2> <OutFile>n", argv[0]);
return 1;
}
/*
Opening input files (file1 and file2) for reading in text mode.
The output file (OutFile) is open for writing.
*/
if(((fpin1=fopen(argv[1],"r"))==NULL) ||
((fpin2=fopen(argv[2],"r"))==NULL) ||
((fpout=fopen(argv[3],"w"))==NULL))
{
printf("Error! Could not open filesn");
return 1;
}
while(fgets(line, sizeof(line), fpin1)!=NULL) /* Read hash line from the first file1 */
{
bCheck=FALSE;
while(fgets(line1, sizeof(line1), fpin2)!=NULL) /* Read hash line from the second file2 */
{
if(!strncmp(line, line1, 38)) /* Compares 38 characters of the line in file1 to those of the file2 */
{
bCheck=!bCheck;
break;
}
}
if(!bCheck) /* Does compared line are the same ? */
{
fputs(line,fpout); /* Yes - write them in a file OutFile */
ncHash++; /* Count identical lines */
}
rewind(fpin2); /* Seek to the beginning of the file2 */
count++; /* Counting the read lines in file1 */
}
printf("nDone...n");

fclose( fpin1);
fclose( fpin2);
fclose( fpout);
return 0;
}


The OutFile from the program is:



cf03189f5b05eb1a9658f80d7a0e9f02:_#.g}
5701aa8e2aa7e1cfd16ca4076bd1732a:@AQ1z
7a1e6090568e076c55df9dc7abf356c6:9rC@p
04046da33706518d9b15a38bcddb448e:!DFPk





  • I want to know how to write it in awk?










share|improve this question




















  • 1




    Look at the comm or join standard unix utilities.
    – Stephane Chazelas
    Jun 24 '13 at 14:32












  • join -v1 all <(cut -d: -f1,2 excludes)
    – Kevin
    Jun 24 '13 at 15:10













up vote
1
down vote

favorite









up vote
1
down vote

favorite











Read line in file1, if line not exist in file2 write this line to the new file .

Comparison is made by the length of the hash string .



Hash values ​​in file1:



cf03189f5b05eb1a9658f80d7a0e9f02:_#.g}
edbe6de8b3ee19b45e092147f57af7b8:]mNon
47253940f843f258ffd265d13f365d70:/u'yv
5701aa8e2aa7e1cfd16ca4076bd1732a:@AQ1z
b3c0866e6fd56776bc4a18d3c87cc725:t$5OV
7a1e6090568e076c55df9dc7abf356c6:9rC@p
04046da33706518d9b15a38bcddb448e:!DFPk


Hash values ​​in file2:



edbe6de8b3ee19b45e092147f57af7b8:]mNon:str1
47253940f843f258ffd265d13f365d70:/u'yv:2str
b3c0866e6fd56776bc4a18d3c87cc725:t$5OV:3str1ng


This is a working example written in C:



#include <stdio.h>
#include <string.h>

#define LINE_LENGTH 80
typedef enum {FALSE=0,TRUE=1} BOOL;
int main(int argc, char *argv)
{
FILE *fpin1 = NULL;
FILE *fpin2 = NULL;
FILE *fpout = NULL;
char line [LINE_LENGTH]={0};
char line1[LINE_LENGTH]={0};
BOOL bCheck = FALSE;
size_t ncHash = 0;
size_t count = 0;

if (argc != 4)
{
printf("Usage:%s <file1> <file2> <OutFile>n", argv[0]);
return 1;
}
/*
Opening input files (file1 and file2) for reading in text mode.
The output file (OutFile) is open for writing.
*/
if(((fpin1=fopen(argv[1],"r"))==NULL) ||
((fpin2=fopen(argv[2],"r"))==NULL) ||
((fpout=fopen(argv[3],"w"))==NULL))
{
printf("Error! Could not open filesn");
return 1;
}
while(fgets(line, sizeof(line), fpin1)!=NULL) /* Read hash line from the first file1 */
{
bCheck=FALSE;
while(fgets(line1, sizeof(line1), fpin2)!=NULL) /* Read hash line from the second file2 */
{
if(!strncmp(line, line1, 38)) /* Compares 38 characters of the line in file1 to those of the file2 */
{
bCheck=!bCheck;
break;
}
}
if(!bCheck) /* Does compared line are the same ? */
{
fputs(line,fpout); /* Yes - write them in a file OutFile */
ncHash++; /* Count identical lines */
}
rewind(fpin2); /* Seek to the beginning of the file2 */
count++; /* Counting the read lines in file1 */
}
printf("nDone...n");

fclose( fpin1);
fclose( fpin2);
fclose( fpout);
return 0;
}


The OutFile from the program is:



cf03189f5b05eb1a9658f80d7a0e9f02:_#.g}
5701aa8e2aa7e1cfd16ca4076bd1732a:@AQ1z
7a1e6090568e076c55df9dc7abf356c6:9rC@p
04046da33706518d9b15a38bcddb448e:!DFPk





  • I want to know how to write it in awk?










share|improve this question















Read line in file1, if line not exist in file2 write this line to the new file .

Comparison is made by the length of the hash string .



Hash values ​​in file1:



cf03189f5b05eb1a9658f80d7a0e9f02:_#.g}
edbe6de8b3ee19b45e092147f57af7b8:]mNon
47253940f843f258ffd265d13f365d70:/u'yv
5701aa8e2aa7e1cfd16ca4076bd1732a:@AQ1z
b3c0866e6fd56776bc4a18d3c87cc725:t$5OV
7a1e6090568e076c55df9dc7abf356c6:9rC@p
04046da33706518d9b15a38bcddb448e:!DFPk


Hash values ​​in file2:



edbe6de8b3ee19b45e092147f57af7b8:]mNon:str1
47253940f843f258ffd265d13f365d70:/u'yv:2str
b3c0866e6fd56776bc4a18d3c87cc725:t$5OV:3str1ng


This is a working example written in C:



#include <stdio.h>
#include <string.h>

#define LINE_LENGTH 80
typedef enum {FALSE=0,TRUE=1} BOOL;
int main(int argc, char *argv)
{
FILE *fpin1 = NULL;
FILE *fpin2 = NULL;
FILE *fpout = NULL;
char line [LINE_LENGTH]={0};
char line1[LINE_LENGTH]={0};
BOOL bCheck = FALSE;
size_t ncHash = 0;
size_t count = 0;

if (argc != 4)
{
printf("Usage:%s <file1> <file2> <OutFile>n", argv[0]);
return 1;
}
/*
Opening input files (file1 and file2) for reading in text mode.
The output file (OutFile) is open for writing.
*/
if(((fpin1=fopen(argv[1],"r"))==NULL) ||
((fpin2=fopen(argv[2],"r"))==NULL) ||
((fpout=fopen(argv[3],"w"))==NULL))
{
printf("Error! Could not open filesn");
return 1;
}
while(fgets(line, sizeof(line), fpin1)!=NULL) /* Read hash line from the first file1 */
{
bCheck=FALSE;
while(fgets(line1, sizeof(line1), fpin2)!=NULL) /* Read hash line from the second file2 */
{
if(!strncmp(line, line1, 38)) /* Compares 38 characters of the line in file1 to those of the file2 */
{
bCheck=!bCheck;
break;
}
}
if(!bCheck) /* Does compared line are the same ? */
{
fputs(line,fpout); /* Yes - write them in a file OutFile */
ncHash++; /* Count identical lines */
}
rewind(fpin2); /* Seek to the beginning of the file2 */
count++; /* Counting the read lines in file1 */
}
printf("nDone...n");

fclose( fpin1);
fclose( fpin2);
fclose( fpout);
return 0;
}


The OutFile from the program is:



cf03189f5b05eb1a9658f80d7a0e9f02:_#.g}
5701aa8e2aa7e1cfd16ca4076bd1732a:@AQ1z
7a1e6090568e076c55df9dc7abf356c6:9rC@p
04046da33706518d9b15a38bcddb448e:!DFPk





  • I want to know how to write it in awk?







regex perl sed awk pattern-matching






share|improve this question















share|improve this question













share|improve this question




share|improve this question








edited Nov 11 at 7:02









Cœur

17k9102140




17k9102140










asked Jun 24 '13 at 14:29









boleto

7611627




7611627








  • 1




    Look at the comm or join standard unix utilities.
    – Stephane Chazelas
    Jun 24 '13 at 14:32












  • join -v1 all <(cut -d: -f1,2 excludes)
    – Kevin
    Jun 24 '13 at 15:10














  • 1




    Look at the comm or join standard unix utilities.
    – Stephane Chazelas
    Jun 24 '13 at 14:32












  • join -v1 all <(cut -d: -f1,2 excludes)
    – Kevin
    Jun 24 '13 at 15:10








1




1




Look at the comm or join standard unix utilities.
– Stephane Chazelas
Jun 24 '13 at 14:32






Look at the comm or join standard unix utilities.
– Stephane Chazelas
Jun 24 '13 at 14:32














join -v1 all <(cut -d: -f1,2 excludes)
– Kevin
Jun 24 '13 at 15:10




join -v1 all <(cut -d: -f1,2 excludes)
– Kevin
Jun 24 '13 at 15:10












4 Answers
4






active

oldest

votes

















up vote
3
down vote



accepted










awk -F: 'NR==FNR{a[$1,$2];next}!(($1,$2) in a)' excludes.txt all.txt


Note the reversal of file argument order.



An explanation:
-F: - use : as a field separator
NR==FNR - The first file (current row number = total row number)
a[$1,$2] - Touch the array at the first two fields
next - move on to the next line, so we don't have to check the alternate NR != FNR condition
!(($1,$2) in a) - check whether the combination was seen. If not, print the line (default action)






share|improve this answer























  • Does not work for me (awk -W version: GNU Awk 3.1.8 Copyright © 1998, 1991-2010 Free Software Foundation.) : outputs 7 lines..
    – xtof pernod
    Jun 24 '13 at 14:57












  • Works on mac/bsd awk, I'm checking but try replacing $1,$2 with $1":"$2 both places.
    – Kevin
    Jun 24 '13 at 15:00










  • Also works on mawk, the default awk on ubuntu server 13.04
    – Kevin
    Jun 24 '13 at 15:03










  • Works on GNU Awk 4.1.0, API: 1.0
    – Kevin
    Jun 24 '13 at 15:05










  • script work properly, but must be careful with spaces. For example if you copy/paste lines directly from html form, some empty spaces will be added at end of line and result will be incorrect. @xtof pernaud solution does not suffer from this inconvenience.
    – boleto
    Jun 24 '13 at 18:07




















up vote
2
down vote













Code for GNU sed:



sed -r 'sµ(.*):.*$µ\§1§dµ' file2 |sed -f - file1


Because of the many 'ugly' characters the code is for info only, do not use in production.






share|improve this answer




























    up vote
    1
    down vote













    I suppose you mean awk. It's possible to do it, but it will eat 2 times the files sizes of memory:



    cat file1 file2 |
    awk '{ s = substr($0, 1, 38); str[NR] = s; ex[s]++; }
    END {
    for (i = 1; i <= NR; i++) {
    s = str[i];
    if (ex[s] == 1)
    print s;
    }
    }'


    Output:



    cf03189f5b05eb1a9658f80d7a0e9f02:_#.g}
    5701aa8e2aa7e1cfd16ca4076bd1732a:@AQ1z
    7a1e6090568e076c55df9dc7abf356c6:9rC@p
    04046da33706518d9b15a38bcddb448e:!DFPk





    share|improve this answer






























      up vote
      1
      down vote













      In perl



      perl -F: -ane'BEGIN{$f=$ARGV[0]}print if$ARGV ne$f&&!$h{$F[0]};$h{$F[0]}=1' file2 file1





      share|improve this answer





















        Your Answer






        StackExchange.ifUsing("editor", function () {
        StackExchange.using("externalEditor", function () {
        StackExchange.using("snippets", function () {
        StackExchange.snippets.init();
        });
        });
        }, "code-snippets");

        StackExchange.ready(function() {
        var channelOptions = {
        tags: "".split(" "),
        id: "1"
        };
        initTagRenderer("".split(" "), "".split(" "), channelOptions);

        StackExchange.using("externalEditor", function() {
        // Have to fire editor after snippets, if snippets enabled
        if (StackExchange.settings.snippets.snippetsEnabled) {
        StackExchange.using("snippets", function() {
        createEditor();
        });
        }
        else {
        createEditor();
        }
        });

        function createEditor() {
        StackExchange.prepareEditor({
        heartbeatType: 'answer',
        convertImagesToLinks: true,
        noModals: true,
        showLowRepImageUploadWarning: true,
        reputationToPostImages: 10,
        bindNavPrevention: true,
        postfix: "",
        imageUploader: {
        brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
        contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
        allowUrls: true
        },
        onDemand: true,
        discardSelector: ".discard-answer"
        ,immediatelyShowMarkdownHelp:true
        });


        }
        });














         

        draft saved


        draft discarded


















        StackExchange.ready(
        function () {
        StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f17277972%2fsorting-hash-values-by-length-of-lines-in-a-text-files%23new-answer', 'question_page');
        }
        );

        Post as a guest















        Required, but never shown

























        4 Answers
        4






        active

        oldest

        votes








        4 Answers
        4






        active

        oldest

        votes









        active

        oldest

        votes






        active

        oldest

        votes








        up vote
        3
        down vote



        accepted










        awk -F: 'NR==FNR{a[$1,$2];next}!(($1,$2) in a)' excludes.txt all.txt


        Note the reversal of file argument order.



        An explanation:
        -F: - use : as a field separator
        NR==FNR - The first file (current row number = total row number)
        a[$1,$2] - Touch the array at the first two fields
        next - move on to the next line, so we don't have to check the alternate NR != FNR condition
        !(($1,$2) in a) - check whether the combination was seen. If not, print the line (default action)






        share|improve this answer























        • Does not work for me (awk -W version: GNU Awk 3.1.8 Copyright © 1998, 1991-2010 Free Software Foundation.) : outputs 7 lines..
          – xtof pernod
          Jun 24 '13 at 14:57












        • Works on mac/bsd awk, I'm checking but try replacing $1,$2 with $1":"$2 both places.
          – Kevin
          Jun 24 '13 at 15:00










        • Also works on mawk, the default awk on ubuntu server 13.04
          – Kevin
          Jun 24 '13 at 15:03










        • Works on GNU Awk 4.1.0, API: 1.0
          – Kevin
          Jun 24 '13 at 15:05










        • script work properly, but must be careful with spaces. For example if you copy/paste lines directly from html form, some empty spaces will be added at end of line and result will be incorrect. @xtof pernaud solution does not suffer from this inconvenience.
          – boleto
          Jun 24 '13 at 18:07

















        up vote
        3
        down vote



        accepted










        awk -F: 'NR==FNR{a[$1,$2];next}!(($1,$2) in a)' excludes.txt all.txt


        Note the reversal of file argument order.



        An explanation:
        -F: - use : as a field separator
        NR==FNR - The first file (current row number = total row number)
        a[$1,$2] - Touch the array at the first two fields
        next - move on to the next line, so we don't have to check the alternate NR != FNR condition
        !(($1,$2) in a) - check whether the combination was seen. If not, print the line (default action)






        share|improve this answer























        • Does not work for me (awk -W version: GNU Awk 3.1.8 Copyright © 1998, 1991-2010 Free Software Foundation.) : outputs 7 lines..
          – xtof pernod
          Jun 24 '13 at 14:57












        • Works on mac/bsd awk, I'm checking but try replacing $1,$2 with $1":"$2 both places.
          – Kevin
          Jun 24 '13 at 15:00










        • Also works on mawk, the default awk on ubuntu server 13.04
          – Kevin
          Jun 24 '13 at 15:03










        • Works on GNU Awk 4.1.0, API: 1.0
          – Kevin
          Jun 24 '13 at 15:05










        • script work properly, but must be careful with spaces. For example if you copy/paste lines directly from html form, some empty spaces will be added at end of line and result will be incorrect. @xtof pernaud solution does not suffer from this inconvenience.
          – boleto
          Jun 24 '13 at 18:07















        up vote
        3
        down vote



        accepted







        up vote
        3
        down vote



        accepted






        awk -F: 'NR==FNR{a[$1,$2];next}!(($1,$2) in a)' excludes.txt all.txt


        Note the reversal of file argument order.



        An explanation:
        -F: - use : as a field separator
        NR==FNR - The first file (current row number = total row number)
        a[$1,$2] - Touch the array at the first two fields
        next - move on to the next line, so we don't have to check the alternate NR != FNR condition
        !(($1,$2) in a) - check whether the combination was seen. If not, print the line (default action)






        share|improve this answer














        awk -F: 'NR==FNR{a[$1,$2];next}!(($1,$2) in a)' excludes.txt all.txt


        Note the reversal of file argument order.



        An explanation:
        -F: - use : as a field separator
        NR==FNR - The first file (current row number = total row number)
        a[$1,$2] - Touch the array at the first two fields
        next - move on to the next line, so we don't have to check the alternate NR != FNR condition
        !(($1,$2) in a) - check whether the combination was seen. If not, print the line (default action)







        share|improve this answer














        share|improve this answer



        share|improve this answer








        edited Jun 24 '13 at 14:54

























        answered Jun 24 '13 at 14:47









        Kevin

        38.2k977109




        38.2k977109












        • Does not work for me (awk -W version: GNU Awk 3.1.8 Copyright © 1998, 1991-2010 Free Software Foundation.) : outputs 7 lines..
          – xtof pernod
          Jun 24 '13 at 14:57












        • Works on mac/bsd awk, I'm checking but try replacing $1,$2 with $1":"$2 both places.
          – Kevin
          Jun 24 '13 at 15:00










        • Also works on mawk, the default awk on ubuntu server 13.04
          – Kevin
          Jun 24 '13 at 15:03










        • Works on GNU Awk 4.1.0, API: 1.0
          – Kevin
          Jun 24 '13 at 15:05










        • script work properly, but must be careful with spaces. For example if you copy/paste lines directly from html form, some empty spaces will be added at end of line and result will be incorrect. @xtof pernaud solution does not suffer from this inconvenience.
          – boleto
          Jun 24 '13 at 18:07




















        • Does not work for me (awk -W version: GNU Awk 3.1.8 Copyright © 1998, 1991-2010 Free Software Foundation.) : outputs 7 lines..
          – xtof pernod
          Jun 24 '13 at 14:57












        • Works on mac/bsd awk, I'm checking but try replacing $1,$2 with $1":"$2 both places.
          – Kevin
          Jun 24 '13 at 15:00










        • Also works on mawk, the default awk on ubuntu server 13.04
          – Kevin
          Jun 24 '13 at 15:03










        • Works on GNU Awk 4.1.0, API: 1.0
          – Kevin
          Jun 24 '13 at 15:05










        • script work properly, but must be careful with spaces. For example if you copy/paste lines directly from html form, some empty spaces will be added at end of line and result will be incorrect. @xtof pernaud solution does not suffer from this inconvenience.
          – boleto
          Jun 24 '13 at 18:07


















        Does not work for me (awk -W version: GNU Awk 3.1.8 Copyright © 1998, 1991-2010 Free Software Foundation.) : outputs 7 lines..
        – xtof pernod
        Jun 24 '13 at 14:57






        Does not work for me (awk -W version: GNU Awk 3.1.8 Copyright © 1998, 1991-2010 Free Software Foundation.) : outputs 7 lines..
        – xtof pernod
        Jun 24 '13 at 14:57














        Works on mac/bsd awk, I'm checking but try replacing $1,$2 with $1":"$2 both places.
        – Kevin
        Jun 24 '13 at 15:00




        Works on mac/bsd awk, I'm checking but try replacing $1,$2 with $1":"$2 both places.
        – Kevin
        Jun 24 '13 at 15:00












        Also works on mawk, the default awk on ubuntu server 13.04
        – Kevin
        Jun 24 '13 at 15:03




        Also works on mawk, the default awk on ubuntu server 13.04
        – Kevin
        Jun 24 '13 at 15:03












        Works on GNU Awk 4.1.0, API: 1.0
        – Kevin
        Jun 24 '13 at 15:05




        Works on GNU Awk 4.1.0, API: 1.0
        – Kevin
        Jun 24 '13 at 15:05












        script work properly, but must be careful with spaces. For example if you copy/paste lines directly from html form, some empty spaces will be added at end of line and result will be incorrect. @xtof pernaud solution does not suffer from this inconvenience.
        – boleto
        Jun 24 '13 at 18:07






        script work properly, but must be careful with spaces. For example if you copy/paste lines directly from html form, some empty spaces will be added at end of line and result will be incorrect. @xtof pernaud solution does not suffer from this inconvenience.
        – boleto
        Jun 24 '13 at 18:07














        up vote
        2
        down vote













        Code for GNU sed:



        sed -r 'sµ(.*):.*$µ\§1§dµ' file2 |sed -f - file1


        Because of the many 'ugly' characters the code is for info only, do not use in production.






        share|improve this answer

























          up vote
          2
          down vote













          Code for GNU sed:



          sed -r 'sµ(.*):.*$µ\§1§dµ' file2 |sed -f - file1


          Because of the many 'ugly' characters the code is for info only, do not use in production.






          share|improve this answer























            up vote
            2
            down vote










            up vote
            2
            down vote









            Code for GNU sed:



            sed -r 'sµ(.*):.*$µ\§1§dµ' file2 |sed -f - file1


            Because of the many 'ugly' characters the code is for info only, do not use in production.






            share|improve this answer












            Code for GNU sed:



            sed -r 'sµ(.*):.*$µ\§1§dµ' file2 |sed -f - file1


            Because of the many 'ugly' characters the code is for info only, do not use in production.







            share|improve this answer












            share|improve this answer



            share|improve this answer










            answered Jun 24 '13 at 18:39









            captcha

            3,598720




            3,598720






















                up vote
                1
                down vote













                I suppose you mean awk. It's possible to do it, but it will eat 2 times the files sizes of memory:



                cat file1 file2 |
                awk '{ s = substr($0, 1, 38); str[NR] = s; ex[s]++; }
                END {
                for (i = 1; i <= NR; i++) {
                s = str[i];
                if (ex[s] == 1)
                print s;
                }
                }'


                Output:



                cf03189f5b05eb1a9658f80d7a0e9f02:_#.g}
                5701aa8e2aa7e1cfd16ca4076bd1732a:@AQ1z
                7a1e6090568e076c55df9dc7abf356c6:9rC@p
                04046da33706518d9b15a38bcddb448e:!DFPk





                share|improve this answer



























                  up vote
                  1
                  down vote













                  I suppose you mean awk. It's possible to do it, but it will eat 2 times the files sizes of memory:



                  cat file1 file2 |
                  awk '{ s = substr($0, 1, 38); str[NR] = s; ex[s]++; }
                  END {
                  for (i = 1; i <= NR; i++) {
                  s = str[i];
                  if (ex[s] == 1)
                  print s;
                  }
                  }'


                  Output:



                  cf03189f5b05eb1a9658f80d7a0e9f02:_#.g}
                  5701aa8e2aa7e1cfd16ca4076bd1732a:@AQ1z
                  7a1e6090568e076c55df9dc7abf356c6:9rC@p
                  04046da33706518d9b15a38bcddb448e:!DFPk





                  share|improve this answer

























                    up vote
                    1
                    down vote










                    up vote
                    1
                    down vote









                    I suppose you mean awk. It's possible to do it, but it will eat 2 times the files sizes of memory:



                    cat file1 file2 |
                    awk '{ s = substr($0, 1, 38); str[NR] = s; ex[s]++; }
                    END {
                    for (i = 1; i <= NR; i++) {
                    s = str[i];
                    if (ex[s] == 1)
                    print s;
                    }
                    }'


                    Output:



                    cf03189f5b05eb1a9658f80d7a0e9f02:_#.g}
                    5701aa8e2aa7e1cfd16ca4076bd1732a:@AQ1z
                    7a1e6090568e076c55df9dc7abf356c6:9rC@p
                    04046da33706518d9b15a38bcddb448e:!DFPk





                    share|improve this answer














                    I suppose you mean awk. It's possible to do it, but it will eat 2 times the files sizes of memory:



                    cat file1 file2 |
                    awk '{ s = substr($0, 1, 38); str[NR] = s; ex[s]++; }
                    END {
                    for (i = 1; i <= NR; i++) {
                    s = str[i];
                    if (ex[s] == 1)
                    print s;
                    }
                    }'


                    Output:



                    cf03189f5b05eb1a9658f80d7a0e9f02:_#.g}
                    5701aa8e2aa7e1cfd16ca4076bd1732a:@AQ1z
                    7a1e6090568e076c55df9dc7abf356c6:9rC@p
                    04046da33706518d9b15a38bcddb448e:!DFPk






                    share|improve this answer














                    share|improve this answer



                    share|improve this answer








                    edited Jun 24 '13 at 19:44

























                    answered Jun 24 '13 at 14:49









                    xtof pernod

                    81757




                    81757






















                        up vote
                        1
                        down vote













                        In perl



                        perl -F: -ane'BEGIN{$f=$ARGV[0]}print if$ARGV ne$f&&!$h{$F[0]};$h{$F[0]}=1' file2 file1





                        share|improve this answer

























                          up vote
                          1
                          down vote













                          In perl



                          perl -F: -ane'BEGIN{$f=$ARGV[0]}print if$ARGV ne$f&&!$h{$F[0]};$h{$F[0]}=1' file2 file1





                          share|improve this answer























                            up vote
                            1
                            down vote










                            up vote
                            1
                            down vote









                            In perl



                            perl -F: -ane'BEGIN{$f=$ARGV[0]}print if$ARGV ne$f&&!$h{$F[0]};$h{$F[0]}=1' file2 file1





                            share|improve this answer












                            In perl



                            perl -F: -ane'BEGIN{$f=$ARGV[0]}print if$ARGV ne$f&&!$h{$F[0]};$h{$F[0]}=1' file2 file1






                            share|improve this answer












                            share|improve this answer



                            share|improve this answer










                            answered Aug 11 '13 at 7:14









                            Hynek -Pichi- Vychodil

                            23.4k54166




                            23.4k54166






























                                 

                                draft saved


                                draft discarded



















































                                 


                                draft saved


                                draft discarded














                                StackExchange.ready(
                                function () {
                                StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f17277972%2fsorting-hash-values-by-length-of-lines-in-a-text-files%23new-answer', 'question_page');
                                }
                                );

                                Post as a guest















                                Required, but never shown





















































                                Required, but never shown














                                Required, but never shown












                                Required, but never shown







                                Required, but never shown

































                                Required, but never shown














                                Required, but never shown












                                Required, but never shown







                                Required, but never shown







                                Popular posts from this blog

                                Xamarin.iOS Cant Deploy on Iphone

                                Glorious Revolution

                                Dulmage-Mendelsohn matrix decomposition in Python