This program collects the functions by module in a directory full of modules and checks a code base against it. This tells you which functions you are actually using. Which isn't quite what I wanted, but close.
I can see some additions I could want. Setting the library directory and code directories via
Getopt::Long would be the first one. And it doesn't quite tell me what I want to know in all cases. If I use a function in a program that never gets called, it still gives me a result. But this is a place to start.And because of this, I now know that, within the stack of previously-invented wheels called CPAN, there's a module called Regexp::Common that holds a stack of established regular expressions. I wanted to pull out all comments out of my programs for testing, so that a commented-out function doesn't count.
#!/usr/bin/perl
use 5.010 ;
use strict ;
use warnings ;
use Cwd 'abs_path' ;
use Regexp::Common qw /comment/ ;
use subs qw{
check_programs
module_list
decomment
drop_pm
get_module_subs
pull_package_name
pull_module_name
pull_sub_name
} ;
my $modules = module_list '/path/to/my/lib' ;
my $directories = [
'/code/directory/one',
'/code/directory/two',
'/code/directory/three', ] ;
my $data = check_programs( $directories, $modules ) ;
for my $mod ( sort keys %$data ) {
my $module = $data->{ $mod } ;
say $mod ;
for my $sub ( sort keys %$module ) {
my $subroutine = $module->{ $sub } ;
say join "\t", '',
( $subroutine->{ count } ? $subroutine->{ count } : 0 ) ,
$sub,
;
}
}
exit ;
########## ######### ######### ######### ######### #########
########## ######### Subroutines ######### #########
########## ######### ######### ######### ######### #########
#--------- --------- --------- --------- --------- --------- ---------
# The core of the program
sub check_programs {
my ( $directories, $modules ) = @_ ;
my $data ;
for my $program_dir ( @$directories ) {
my $program_directory = abs_path $program_dir ;
chdir $program_directory ;
#say $program_directory ;
my @directory = glob '*.cgi *.pl *.pm' ;
my $programs ;
@$programs = map {
{ $_ => get_program( $_ ) }
} @directory ;
for my $program ( @$programs ) {
my $k ;
( $k ) = keys %$program ;
my $v = $program->{ $k } ;
#say join "\t", '', $k ;
for my $module ( @$modules ) {
my $mk ;
( $mk ) = keys %$module ;
my $mv = $module->{ $mk } ;
#say join "\t", '', '', $mk ;
for my $sub ( @$mv ) {
my $result = $v =~ /$sub/ ? 1 : 0 ;
#$result
# and say join "\t", '', '', '', $result, $sub ;
$data->{ $mk }->{ $sub }->{ exists } = 1 ;
if ( $result ) {
$data->{ $mk }->{ $sub }->{ count }++ ;
push @{ $data->{ $mk }->{ $sub }->{ used } },
abs_path $k ;
}
}
}
}
}
return $data ;
}
#--------- --------- --------- --------- --------- --------- ---------
# returns the contents of a filename, if it contains 'perl' in the top
sub get_program {
my ( $filename ) = @_ ;
if ( -f $filename ) {
if ( open my $fh, '<', $filename ) {
my @lines =
map { decomment $_ } <$fh> ;
#return 0 if $lines[0] !~ m/perl/ ;
return join '', @lines ;
close $fh ;
}
}
return 0 ;
}
#--------- --------- --------- --------- --------- --------- ---------
# removes Perl comments
sub decomment {
my ( $code ) = @_ ;
#chomp $code ;
$code =~ s/$RE{comment}{Perl}// ;
return $code ;
}
#--------- --------- --------- --------- --------- --------- ---------
# returns an array ref of module names with an array of subroutines
# the module contains
sub module_list {
my ( $dir ) = @_ ;
my $directory = abs_path $dir ;
chdir $directory ;
my $output ;
my @directory = glob '*.pm' ;
@$output = map {
{
( pull_module_name join '/', $directory, $_ . '.pm' ) =>
( get_module_subs join '/', $directory, $_ . '.pm' )
}
}
map { drop_pm $_ } @directory ;
return $output ;
}
#--------- --------- --------- --------- --------- --------- ---------
# returns an array ref to all the functions (minus internal functions
# whose name starts with _) within a given module
sub get_module_subs {
my ( $mod_path ) = @_ ;
my @output ;
if ( -f $mod_path ) {
if ( open my $fh, '<', $mod_path ) {
my @lines = <$fh> ;
push @output, grep { !/^_/ }
map { pull_sub_name $_ }
grep { /^\s*sub / } @lines ;
close $fh ;
}
}
@output = sort @output ;
return \@output ;
}
#--------- --------- --------- --------- --------- --------- ---------
# return the package name of a module
sub pull_module_name {
my ( $mod_path ) = @_ ;
my @output ;
if ( -f $mod_path ) {
if ( open my $fh, '<', $mod_path ) {
my @lines = <$fh> ;
push @output, map { pull_package_name $_ }
grep { /^\s*package / } @lines ;
close $fh ;
}
}
return $output[ 0 ] ;
}
#--------- --------- --------- --------- --------- --------- ---------
# return the package name of a 'package Package::Name ;' string
sub pull_package_name {
my ( $in ) = @_ ;
chomp $in ;
$in = ( split m{\s*package\s*}, $in )[ 1 ] ;
$in = ( split m/\s*;\s*/, $in )[ 0 ] ;
return $in ;
}
#--------- --------- --------- --------- --------- --------- ---------
# return only the subroutine name from a 'sub sub_name { ' string
sub pull_sub_name {
my ( $in ) = @_ ;
chomp $in ;
$in = ( split m{\s*sub\s*}, $in )[ 1 ] ;
$in = ( split m/\s*{\s*/, $in )[ 0 ] ;
return $in ;
}
#--------- --------- --------- --------- --------- --------- ---------
# remove '.pm' from end of module file names
sub drop_pm {
my ( $in ) = @_ ;
$in =~ s/\.pm$// ;
return $in ;
}
I'll confess that I wish I wrote it more Higher Order, but that can be changed later.
ReplyDelete