419 lines
		
	
	
	
		
			12 KiB
			
		
	
	
	
		
			Perl
		
	
	
	
	
	
		
		
			
		
	
	
			419 lines
		
	
	
	
		
			12 KiB
			
		
	
	
	
		
			Perl
		
	
	
	
	
	
|   | #!/usr/bin/perl | ||
|  | # This is a POC (proof of concept or piece of crap, take your pick) for reading the | ||
|  | # text representation of trace output related to page allocation. It makes an attempt | ||
|  | # to extract some high-level information on what is going on. The accuracy of the parser | ||
|  | # may vary considerably | ||
|  | # | ||
|  | # Example usage: trace-pagealloc-postprocess.pl < /sys/kernel/debug/tracing/trace_pipe | ||
|  | # other options | ||
|  | #   --prepend-parent	Report on the parent proc and PID | ||
|  | #   --read-procstat	If the trace lacks process info, get it from /proc | ||
|  | #   --ignore-pid	Aggregate processes of the same name together | ||
|  | # | ||
|  | # Copyright (c) IBM Corporation 2009 | ||
|  | # Author: Mel Gorman <mel@csn.ul.ie> | ||
|  | use strict; | ||
|  | use Getopt::Long; | ||
|  | 
 | ||
|  | # Tracepoint events | ||
|  | use constant MM_PAGE_ALLOC		=> 1; | ||
|  | use constant MM_PAGE_FREE_DIRECT 	=> 2; | ||
|  | use constant MM_PAGEVEC_FREE		=> 3; | ||
|  | use constant MM_PAGE_PCPU_DRAIN		=> 4; | ||
|  | use constant MM_PAGE_ALLOC_ZONE_LOCKED	=> 5; | ||
|  | use constant MM_PAGE_ALLOC_EXTFRAG	=> 6; | ||
|  | use constant EVENT_UNKNOWN		=> 7; | ||
|  | 
 | ||
|  | # Constants used to track state | ||
|  | use constant STATE_PCPU_PAGES_DRAINED	=> 8; | ||
|  | use constant STATE_PCPU_PAGES_REFILLED	=> 9; | ||
|  | 
 | ||
|  | # High-level events extrapolated from tracepoints | ||
|  | use constant HIGH_PCPU_DRAINS		=> 10; | ||
|  | use constant HIGH_PCPU_REFILLS		=> 11; | ||
|  | use constant HIGH_EXT_FRAGMENT		=> 12; | ||
|  | use constant HIGH_EXT_FRAGMENT_SEVERE	=> 13; | ||
|  | use constant HIGH_EXT_FRAGMENT_MODERATE	=> 14; | ||
|  | use constant HIGH_EXT_FRAGMENT_CHANGED	=> 15; | ||
|  | 
 | ||
|  | my %perprocesspid; | ||
|  | my %perprocess; | ||
|  | my $opt_ignorepid; | ||
|  | my $opt_read_procstat; | ||
|  | my $opt_prepend_parent; | ||
|  | 
 | ||
|  | # Catch sigint and exit on request | ||
|  | my $sigint_report = 0; | ||
|  | my $sigint_exit = 0; | ||
|  | my $sigint_pending = 0; | ||
|  | my $sigint_received = 0; | ||
|  | sub sigint_handler { | ||
|  | 	my $current_time = time; | ||
|  | 	if ($current_time - 2 > $sigint_received) { | ||
|  | 		print "SIGINT received, report pending. Hit ctrl-c again to exit\n"; | ||
|  | 		$sigint_report = 1; | ||
|  | 	} else { | ||
|  | 		if (!$sigint_exit) { | ||
|  | 			print "Second SIGINT received quickly, exiting\n"; | ||
|  | 		} | ||
|  | 		$sigint_exit++; | ||
|  | 	} | ||
|  | 
 | ||
|  | 	if ($sigint_exit > 3) { | ||
|  | 		print "Many SIGINTs received, exiting now without report\n"; | ||
|  | 		exit; | ||
|  | 	} | ||
|  | 
 | ||
|  | 	$sigint_received = $current_time; | ||
|  | 	$sigint_pending = 1; | ||
|  | } | ||
|  | $SIG{INT} = "sigint_handler"; | ||
|  | 
 | ||
|  | # Parse command line options | ||
|  | GetOptions( | ||
|  | 	'ignore-pid'	 =>	\$opt_ignorepid, | ||
|  | 	'read-procstat'	 =>	\$opt_read_procstat, | ||
|  | 	'prepend-parent' =>	\$opt_prepend_parent, | ||
|  | ); | ||
|  | 
 | ||
|  | # Defaults for dynamically discovered regex's | ||
|  | my $regex_fragdetails_default = 'page=([0-9a-f]*) pfn=([0-9]*) alloc_order=([-0-9]*) fallback_order=([-0-9]*) pageblock_order=([-0-9]*) alloc_migratetype=([-0-9]*) fallback_migratetype=([-0-9]*) fragmenting=([-0-9]) change_ownership=([-0-9])'; | ||
|  | 
 | ||
|  | # Dyanically discovered regex | ||
|  | my $regex_fragdetails; | ||
|  | 
 | ||
|  | # Static regex used. Specified like this for readability and for use with /o | ||
|  | #                      (process_pid)     (cpus      )   ( time  )   (tpoint    ) (details) | ||
|  | my $regex_traceevent = '\s*([a-zA-Z0-9-]*)\s*(\[[0-9]*\])\s*([0-9.]*):\s*([a-zA-Z_]*):\s*(.*)'; | ||
|  | my $regex_statname = '[-0-9]*\s\((.*)\).*'; | ||
|  | my $regex_statppid = '[-0-9]*\s\(.*\)\s[A-Za-z]\s([0-9]*).*'; | ||
|  | 
 | ||
|  | sub generate_traceevent_regex { | ||
|  | 	my $event = shift; | ||
|  | 	my $default = shift; | ||
|  | 	my $regex; | ||
|  | 
 | ||
|  | 	# Read the event format or use the default | ||
|  | 	if (!open (FORMAT, "/sys/kernel/debug/tracing/events/$event/format")) { | ||
|  | 		$regex = $default; | ||
|  | 	} else { | ||
|  | 		my $line; | ||
|  | 		while (!eof(FORMAT)) { | ||
|  | 			$line = <FORMAT>; | ||
|  | 			if ($line =~ /^print fmt:\s"(.*)",.*/) { | ||
|  | 				$regex = $1; | ||
|  | 				$regex =~ s/%p/\([0-9a-f]*\)/g; | ||
|  | 				$regex =~ s/%d/\([-0-9]*\)/g; | ||
|  | 				$regex =~ s/%lu/\([0-9]*\)/g; | ||
|  | 			} | ||
|  | 		} | ||
|  | 	} | ||
|  | 
 | ||
|  | 	# Verify fields are in the right order | ||
|  | 	my $tuple; | ||
|  | 	foreach $tuple (split /\s/, $regex) { | ||
|  | 		my ($key, $value) = split(/=/, $tuple); | ||
|  | 		my $expected = shift; | ||
|  | 		if ($key ne $expected) { | ||
|  | 			print("WARNING: Format not as expected '$key' != '$expected'"); | ||
|  | 			$regex =~ s/$key=\((.*)\)/$key=$1/; | ||
|  | 		} | ||
|  | 	} | ||
|  | 
 | ||
|  | 	if (defined shift) { | ||
|  | 		die("Fewer fields than expected in format"); | ||
|  | 	} | ||
|  | 
 | ||
|  | 	return $regex; | ||
|  | } | ||
|  | $regex_fragdetails = generate_traceevent_regex("kmem/mm_page_alloc_extfrag", | ||
|  | 			$regex_fragdetails_default, | ||
|  | 			"page", "pfn", | ||
|  | 			"alloc_order", "fallback_order", "pageblock_order", | ||
|  | 			"alloc_migratetype", "fallback_migratetype", | ||
|  | 			"fragmenting", "change_ownership"); | ||
|  | 
 | ||
|  | sub read_statline($) { | ||
|  | 	my $pid = $_[0]; | ||
|  | 	my $statline; | ||
|  | 
 | ||
|  | 	if (open(STAT, "/proc/$pid/stat")) { | ||
|  | 		$statline = <STAT>; | ||
|  | 		close(STAT); | ||
|  | 	} | ||
|  | 
 | ||
|  | 	if ($statline eq '') { | ||
|  | 		$statline = "-1 (UNKNOWN_PROCESS_NAME) R 0"; | ||
|  | 	} | ||
|  | 
 | ||
|  | 	return $statline; | ||
|  | } | ||
|  | 
 | ||
|  | sub guess_process_pid($$) { | ||
|  | 	my $pid = $_[0]; | ||
|  | 	my $statline = $_[1]; | ||
|  | 
 | ||
|  | 	if ($pid == 0) { | ||
|  | 		return "swapper-0"; | ||
|  | 	} | ||
|  | 
 | ||
|  | 	if ($statline !~ /$regex_statname/o) { | ||
|  | 		die("Failed to math stat line for process name :: $statline"); | ||
|  | 	} | ||
|  | 	return "$1-$pid"; | ||
|  | } | ||
|  | 
 | ||
|  | sub parent_info($$) { | ||
|  | 	my $pid = $_[0]; | ||
|  | 	my $statline = $_[1]; | ||
|  | 	my $ppid; | ||
|  | 
 | ||
|  | 	if ($pid == 0) { | ||
|  | 		return "NOPARENT-0"; | ||
|  | 	} | ||
|  | 
 | ||
|  | 	if ($statline !~ /$regex_statppid/o) { | ||
|  | 		die("Failed to match stat line process ppid:: $statline"); | ||
|  | 	} | ||
|  | 
 | ||
|  | 	# Read the ppid stat line | ||
|  | 	$ppid = $1; | ||
|  | 	return guess_process_pid($ppid, read_statline($ppid)); | ||
|  | } | ||
|  | 
 | ||
|  | sub process_events { | ||
|  | 	my $traceevent; | ||
|  | 	my $process_pid; | ||
|  | 	my $cpus; | ||
|  | 	my $timestamp; | ||
|  | 	my $tracepoint; | ||
|  | 	my $details; | ||
|  | 	my $statline; | ||
|  | 
 | ||
|  | 	# Read each line of the event log | ||
|  | EVENT_PROCESS: | ||
|  | 	while ($traceevent = <STDIN>) { | ||
|  | 		if ($traceevent =~ /$regex_traceevent/o) { | ||
|  | 			$process_pid = $1; | ||
|  | 			$tracepoint = $4; | ||
|  | 
 | ||
|  | 			if ($opt_read_procstat || $opt_prepend_parent) { | ||
|  | 				$process_pid =~ /(.*)-([0-9]*)$/; | ||
|  | 				my $process = $1; | ||
|  | 				my $pid = $2; | ||
|  | 
 | ||
|  | 				$statline = read_statline($pid); | ||
|  | 
 | ||
|  | 				if ($opt_read_procstat && $process eq '') { | ||
|  | 					$process_pid = guess_process_pid($pid, $statline); | ||
|  | 				} | ||
|  | 
 | ||
|  | 				if ($opt_prepend_parent) { | ||
|  | 					$process_pid = parent_info($pid, $statline) . " :: $process_pid"; | ||
|  | 				} | ||
|  | 			} | ||
|  | 
 | ||
|  | 			# Unnecessary in this script. Uncomment if required | ||
|  | 			# $cpus = $2; | ||
|  | 			# $timestamp = $3; | ||
|  | 		} else { | ||
|  | 			next; | ||
|  | 		} | ||
|  | 
 | ||
|  | 		# Perl Switch() sucks majorly | ||
|  | 		if ($tracepoint eq "mm_page_alloc") { | ||
|  | 			$perprocesspid{$process_pid}->{MM_PAGE_ALLOC}++; | ||
|  | 		} elsif ($tracepoint eq "mm_page_free_direct") { | ||
|  | 			$perprocesspid{$process_pid}->{MM_PAGE_FREE_DIRECT}++; | ||
|  | 		} elsif ($tracepoint eq "mm_pagevec_free") { | ||
|  | 			$perprocesspid{$process_pid}->{MM_PAGEVEC_FREE}++; | ||
|  | 		} elsif ($tracepoint eq "mm_page_pcpu_drain") { | ||
|  | 			$perprocesspid{$process_pid}->{MM_PAGE_PCPU_DRAIN}++; | ||
|  | 			$perprocesspid{$process_pid}->{STATE_PCPU_PAGES_DRAINED}++; | ||
|  | 		} elsif ($tracepoint eq "mm_page_alloc_zone_locked") { | ||
|  | 			$perprocesspid{$process_pid}->{MM_PAGE_ALLOC_ZONE_LOCKED}++; | ||
|  | 			$perprocesspid{$process_pid}->{STATE_PCPU_PAGES_REFILLED}++; | ||
|  | 		} elsif ($tracepoint eq "mm_page_alloc_extfrag") { | ||
|  | 
 | ||
|  | 			# Extract the details of the event now | ||
|  | 			$details = $5; | ||
|  | 
 | ||
|  | 			my ($page, $pfn); | ||
|  | 			my ($alloc_order, $fallback_order, $pageblock_order); | ||
|  | 			my ($alloc_migratetype, $fallback_migratetype); | ||
|  | 			my ($fragmenting, $change_ownership); | ||
|  | 
 | ||
|  | 			if ($details !~ /$regex_fragdetails/o) { | ||
|  | 				print "WARNING: Failed to parse mm_page_alloc_extfrag as expected\n"; | ||
|  | 				next; | ||
|  | 			} | ||
|  | 
 | ||
|  | 			$perprocesspid{$process_pid}->{MM_PAGE_ALLOC_EXTFRAG}++; | ||
|  | 			$page = $1; | ||
|  | 			$pfn = $2; | ||
|  | 			$alloc_order = $3; | ||
|  | 			$fallback_order = $4; | ||
|  | 			$pageblock_order = $5; | ||
|  | 			$alloc_migratetype = $6; | ||
|  | 			$fallback_migratetype = $7; | ||
|  | 			$fragmenting = $8; | ||
|  | 			$change_ownership = $9; | ||
|  | 
 | ||
|  | 			if ($fragmenting) { | ||
|  | 				$perprocesspid{$process_pid}->{HIGH_EXT_FRAG}++; | ||
|  | 				if ($fallback_order <= 3) { | ||
|  | 					$perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_SEVERE}++; | ||
|  | 				} else { | ||
|  | 					$perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_MODERATE}++; | ||
|  | 				} | ||
|  | 			} | ||
|  | 			if ($change_ownership) { | ||
|  | 				$perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_CHANGED}++; | ||
|  | 			} | ||
|  | 		} else { | ||
|  | 			$perprocesspid{$process_pid}->{EVENT_UNKNOWN}++; | ||
|  | 		} | ||
|  | 
 | ||
|  | 		# Catch a full pcpu drain event | ||
|  | 		if ($perprocesspid{$process_pid}->{STATE_PCPU_PAGES_DRAINED} && | ||
|  | 				$tracepoint ne "mm_page_pcpu_drain") { | ||
|  | 
 | ||
|  | 			$perprocesspid{$process_pid}->{HIGH_PCPU_DRAINS}++; | ||
|  | 			$perprocesspid{$process_pid}->{STATE_PCPU_PAGES_DRAINED} = 0; | ||
|  | 		} | ||
|  | 
 | ||
|  | 		# Catch a full pcpu refill event | ||
|  | 		if ($perprocesspid{$process_pid}->{STATE_PCPU_PAGES_REFILLED} && | ||
|  | 				$tracepoint ne "mm_page_alloc_zone_locked") { | ||
|  | 			$perprocesspid{$process_pid}->{HIGH_PCPU_REFILLS}++; | ||
|  | 			$perprocesspid{$process_pid}->{STATE_PCPU_PAGES_REFILLED} = 0; | ||
|  | 		} | ||
|  | 
 | ||
|  | 		if ($sigint_pending) { | ||
|  | 			last EVENT_PROCESS; | ||
|  | 		} | ||
|  | 	} | ||
|  | } | ||
|  | 
 | ||
|  | sub dump_stats { | ||
|  | 	my $hashref = shift; | ||
|  | 	my %stats = %$hashref; | ||
|  | 
 | ||
|  | 	# Dump per-process stats | ||
|  | 	my $process_pid; | ||
|  | 	my $max_strlen = 0; | ||
|  | 
 | ||
|  | 	# Get the maximum process name | ||
|  | 	foreach $process_pid (keys %perprocesspid) { | ||
|  | 		my $len = length($process_pid); | ||
|  | 		if ($len > $max_strlen) { | ||
|  | 			$max_strlen = $len; | ||
|  | 		} | ||
|  | 	} | ||
|  | 	$max_strlen += 2; | ||
|  | 
 | ||
|  | 	printf("\n"); | ||
|  | 	printf("%-" . $max_strlen . "s %8s %10s   %8s %8s   %8s %8s %8s   %8s %8s %8s %8s %8s %8s\n", | ||
|  | 		"Process", "Pages",  "Pages",      "Pages", "Pages", "PCPU",  "PCPU",   "PCPU",    "Fragment",  "Fragment", "MigType", "Fragment", "Fragment", "Unknown"); | ||
|  | 	printf("%-" . $max_strlen . "s %8s %10s   %8s %8s   %8s %8s %8s   %8s %8s %8s %8s %8s %8s\n", | ||
|  | 		"details", "allocd", "allocd",     "freed", "freed", "pages", "drains", "refills", "Fallback", "Causing",   "Changed", "Severe", "Moderate", ""); | ||
|  | 
 | ||
|  | 	printf("%-" . $max_strlen . "s %8s %10s   %8s %8s   %8s %8s %8s   %8s %8s %8s %8s %8s %8s\n", | ||
|  | 		"",        "",       "under lock", "direct", "pagevec", "drain", "", "", "", "", "", "", "", ""); | ||
|  | 
 | ||
|  | 	foreach $process_pid (keys %stats) { | ||
|  | 		# Dump final aggregates | ||
|  | 		if ($stats{$process_pid}->{STATE_PCPU_PAGES_DRAINED}) { | ||
|  | 			$stats{$process_pid}->{HIGH_PCPU_DRAINS}++; | ||
|  | 			$stats{$process_pid}->{STATE_PCPU_PAGES_DRAINED} = 0; | ||
|  | 		} | ||
|  | 		if ($stats{$process_pid}->{STATE_PCPU_PAGES_REFILLED}) { | ||
|  | 			$stats{$process_pid}->{HIGH_PCPU_REFILLS}++; | ||
|  | 			$stats{$process_pid}->{STATE_PCPU_PAGES_REFILLED} = 0; | ||
|  | 		} | ||
|  | 
 | ||
|  | 		printf("%-" . $max_strlen . "s %8d %10d   %8d %8d   %8d %8d %8d   %8d %8d %8d %8d %8d %8d\n", | ||
|  | 			$process_pid, | ||
|  | 			$stats{$process_pid}->{MM_PAGE_ALLOC}, | ||
|  | 			$stats{$process_pid}->{MM_PAGE_ALLOC_ZONE_LOCKED}, | ||
|  | 			$stats{$process_pid}->{MM_PAGE_FREE_DIRECT}, | ||
|  | 			$stats{$process_pid}->{MM_PAGEVEC_FREE}, | ||
|  | 			$stats{$process_pid}->{MM_PAGE_PCPU_DRAIN}, | ||
|  | 			$stats{$process_pid}->{HIGH_PCPU_DRAINS}, | ||
|  | 			$stats{$process_pid}->{HIGH_PCPU_REFILLS}, | ||
|  | 			$stats{$process_pid}->{MM_PAGE_ALLOC_EXTFRAG}, | ||
|  | 			$stats{$process_pid}->{HIGH_EXT_FRAG}, | ||
|  | 			$stats{$process_pid}->{HIGH_EXT_FRAGMENT_CHANGED}, | ||
|  | 			$stats{$process_pid}->{HIGH_EXT_FRAGMENT_SEVERE}, | ||
|  | 			$stats{$process_pid}->{HIGH_EXT_FRAGMENT_MODERATE}, | ||
|  | 			$stats{$process_pid}->{EVENT_UNKNOWN}); | ||
|  | 	} | ||
|  | } | ||
|  | 
 | ||
|  | sub aggregate_perprocesspid() { | ||
|  | 	my $process_pid; | ||
|  | 	my $process; | ||
|  | 	undef %perprocess; | ||
|  | 
 | ||
|  | 	foreach $process_pid (keys %perprocesspid) { | ||
|  | 		$process = $process_pid; | ||
|  | 		$process =~ s/-([0-9])*$//; | ||
|  | 		if ($process eq '') { | ||
|  | 			$process = "NO_PROCESS_NAME"; | ||
|  | 		} | ||
|  | 
 | ||
|  | 		$perprocess{$process}->{MM_PAGE_ALLOC} += $perprocesspid{$process_pid}->{MM_PAGE_ALLOC}; | ||
|  | 		$perprocess{$process}->{MM_PAGE_ALLOC_ZONE_LOCKED} += $perprocesspid{$process_pid}->{MM_PAGE_ALLOC_ZONE_LOCKED}; | ||
|  | 		$perprocess{$process}->{MM_PAGE_FREE_DIRECT} += $perprocesspid{$process_pid}->{MM_PAGE_FREE_DIRECT}; | ||
|  | 		$perprocess{$process}->{MM_PAGEVEC_FREE} += $perprocesspid{$process_pid}->{MM_PAGEVEC_FREE}; | ||
|  | 		$perprocess{$process}->{MM_PAGE_PCPU_DRAIN} += $perprocesspid{$process_pid}->{MM_PAGE_PCPU_DRAIN}; | ||
|  | 		$perprocess{$process}->{HIGH_PCPU_DRAINS} += $perprocesspid{$process_pid}->{HIGH_PCPU_DRAINS}; | ||
|  | 		$perprocess{$process}->{HIGH_PCPU_REFILLS} += $perprocesspid{$process_pid}->{HIGH_PCPU_REFILLS}; | ||
|  | 		$perprocess{$process}->{MM_PAGE_ALLOC_EXTFRAG} += $perprocesspid{$process_pid}->{MM_PAGE_ALLOC_EXTFRAG}; | ||
|  | 		$perprocess{$process}->{HIGH_EXT_FRAG} += $perprocesspid{$process_pid}->{HIGH_EXT_FRAG}; | ||
|  | 		$perprocess{$process}->{HIGH_EXT_FRAGMENT_CHANGED} += $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_CHANGED}; | ||
|  | 		$perprocess{$process}->{HIGH_EXT_FRAGMENT_SEVERE} += $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_SEVERE}; | ||
|  | 		$perprocess{$process}->{HIGH_EXT_FRAGMENT_MODERATE} += $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_MODERATE}; | ||
|  | 		$perprocess{$process}->{EVENT_UNKNOWN} += $perprocesspid{$process_pid}->{EVENT_UNKNOWN}; | ||
|  | 	} | ||
|  | } | ||
|  | 
 | ||
|  | sub report() { | ||
|  | 	if (!$opt_ignorepid) { | ||
|  | 		dump_stats(\%perprocesspid); | ||
|  | 	} else { | ||
|  | 		aggregate_perprocesspid(); | ||
|  | 		dump_stats(\%perprocess); | ||
|  | 	} | ||
|  | } | ||
|  | 
 | ||
|  | # Process events or signals until neither is available | ||
|  | sub signal_loop() { | ||
|  | 	my $sigint_processed; | ||
|  | 	do { | ||
|  | 		$sigint_processed = 0; | ||
|  | 		process_events(); | ||
|  | 
 | ||
|  | 		# Handle pending signals if any | ||
|  | 		if ($sigint_pending) { | ||
|  | 			my $current_time = time; | ||
|  | 
 | ||
|  | 			if ($sigint_exit) { | ||
|  | 				print "Received exit signal\n"; | ||
|  | 				$sigint_pending = 0; | ||
|  | 			} | ||
|  | 			if ($sigint_report) { | ||
|  | 				if ($current_time >= $sigint_received + 2) { | ||
|  | 					report(); | ||
|  | 					$sigint_report = 0; | ||
|  | 					$sigint_pending = 0; | ||
|  | 					$sigint_processed = 1; | ||
|  | 				} | ||
|  | 			} | ||
|  | 		} | ||
|  | 	} while ($sigint_pending || $sigint_processed); | ||
|  | } | ||
|  | 
 | ||
|  | signal_loop(); | ||
|  | report(); |