Perl Script to Splice CSV Files
If you happen to have a huge data set CSV and want to split them out into a smaller subset you can use this Perl script to splice CSV files.
You can only splice text files, and the column orders need to be the same, but it’s useful when you want to run a test on some sample code instead of running a test on millions of lines of CSV data.
If you want Custom Perl Scripting please contact us so we can give you a quote on your project.
Here is how to use a Perl script to splice CSV files.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 |
#!/usr/bin/perl use warnings; # standard best practices use strict; # standard best practices use Benchmark; # in case we need to optimize use Getopt::Std; # for CLI options #----------------------------------------------------------- # SCRIPT START - Do not touch below #----------------------------------------------------------- $|++; my @keeps; my $lines; my $input; my $begin; my $end; my $header; my %options; #----------------------------------------------------------- # LISTFILES: lists files in current directory #----------------------------------------------------------- sub listfiles { print "\nFiles We Found:\n"; opendir my $dir, "." or die "Cannot open directory: $!"; while ( my $node = readdir($dir) ) { next if $node =~ /^\./; print "$node\n"; } closedir $dir; } #----------------------------------------------------------- # USAGE: Educates user on how to run program, then lists files #----------------------------------------------------------- sub usage { my $msg = shift; print "$msg" . "\n\n"; print "Usage: $0" . ' -i <somefile.csv> -b <begin> -e <end> -h <0|1>' . "\n\n"; print "-i should be a csv text file\n"; print "-b is the line number you want to begin with\n"; print "-e is the line number you want to end with\n"; print "-h is a boolean 0 or 1, do you want to include header\n\n"; listfiles(); exit; } #----------------------------------------------------------- # Begin error checking and input #----------------------------------------------------------- # start timer my $t0 = new Benchmark; #-------------------------------- # Make sure at least one argument provided if ( !@ARGV ) { usage(); } # What are the options they entered getopt("ibeh", \%options); # check start line if (defined $options{b}) { $begin = $options{b}; unless ($begin >= 0) { usage("Not designed to have negative beginning") } } else { usage(); } # check stop line if (defined $options{e}) { $end = $options{e}; unless ($begin < $end) { usage("Cannot start after your file ends!") } } else { usage(); } # do we want a header (line 0)? if (defined $options{h}) { $header = $options{h}; } else { usage("Do you want to use first line as a header? 0 or 1"); } print qq~Verifying Infile......~; # test for the existence of the a text file if (defined $options{i}) { $input = $options{i}; if (-e $input) { if (-B $input) { usage("Input file is binary, should be text"); } } else { usage("Input File Doesn't Exist") } } else { usage(); } my $outfile = "$input" . "-processed.csv"; # build outfile names #----------------------------------------------------------- # Main Logic #----------------------------------------------------------- # tell them what's going on my $t1 = new Benchmark; my $td = timediff($t1,$t0); print "DONE!! took '",timestr($td), "' seconds\n"; #-------------------------------- print qq~Counting Lines........~; #----------------------------------------------------------- # READ ENTIRE FILE, LINE BY LINE, FROM BEGIN to END #----------------------------------------------------------- # open file handle ($fh) for reading (<:) open my $fh, "<:encoding(utf8)", $input or die "$!\n"; while (my $row = <$fh>) { $lines++; # keep track of how many lines total chomp $row; # strip off new line # capture header if line == 1 if (($lines == 1) && ($header)){ push @keeps, $row; } elsif ($lines == $begin) { push @keeps, $row; } elsif (($lines > $begin) && ($lines <= $end)){ push @keeps, $row; } elsif ($lines > $end) { last; } else { next; } } close $fh; # notify user of what's going on my $t2 = new Benchmark; $td = timediff($t2,$t1); print "DONE!! took '",timestr($td), "' seconds\n"; #-------------------------------- print qq~Create Output File....~; open my $fh2, ">:encoding(utf8)", $outfile or die "$!\n"; foreach my $line (@keeps) { print $fh2 "$line\n"; } close $fh2; # notify user of what's going on my $t3 = new Benchmark; $td = timediff($t3,$t2); print "DONE!! took '",timestr($td), "' seconds\n"; #-------------------------------- print "Counted '$lines' lines of data\n"; my $kept = @keeps; print "KEPT: $kept\n"; |