Managers {Lahman} | R Documentation |
Managers table: information about individual team managers, teams they managed and some basic statistics for those teams in each year.
data(Managers)
A data frame with 3306 observations on the following 10 variables.
managerID
Player ID Number
yearID
Year
teamID
Team; a factor
lgID
League; a factor with levels AA
AL
FL
NL
PL
UA
inseason
Managerial order. Zero if the individual managed the team the entire year. Otherwise denotes where the manager appeared in the managerial order (1 for first manager, 2 for second, etc.)
G
Games managed
W
Wins
L
Losses
rank
Team's final position in standings that year
plyrMgr
Player Manager (denoted by 'Y'); a factor with levels N
Y
Lahman, S. (2010) Lahman's Baseball Database, 1871-2012, 2012 version, http://baseball1.com/statistics/
####################################
# Basic career summaries by manager
####################################
library('plyr')
mgrsumm <- function(d) {
df <- data.frame(with(d,
nyear = length(unique(yearID)),
yearBegin = min(yearID),
yearEnd = max(yearID),
nTeams = length(unique(teamID)),
nfirst = sum(rank == 1L),
W = sum(W),
L = sum(L),
WinPct = round(W/(W + L), 3)))
df
}
mgrTotals <- ddply(Managers, .(managerID), summarise,
nyear = length(unique(yearID)),
yearBegin = min(yearID),
yearEnd = max(yearID),
nTeams = length(unique(teamID)),
nfirst = sum(rank == 1L),
games = sum(W + L),
W = sum(W),
L = sum(L),
WinPct = round(sum(W)/sum(W + L), 3))
mgrTotals <- merge(mgrTotals,
subset(Master, !is.na(managerID),
select = c('managerID', 'nameLast', 'nameFirst')),
by = 'managerID')
##########################
# Some basic queries
##########################
# Top 20 managers in terms of years of service:
head(arrange(mgrTotals, -nyear), 20)
## managerID nyear yearBegin yearEnd nTeams nfirst games W L WinPct
## 1 mackco01m 53 1894 1950 2 9 7679 3731 3948 0.486
## 2 larusto01m 33 1979 2011 3 12 5093 2728 2365 0.536
## 3 mcgrajo01m 33 1899 1932 3 11 4711 2763 1948 0.586
## 4 coxbo01m 29 1978 2010 2 15 4505 2504 2001 0.556
## 5 harribu01m 29 1924 1956 5 3 4377 2158 2219 0.493
## 6 torrejo01m 29 1977 2010 5 13 4323 2326 1997 0.538
## 7 andersp01m 26 1970 1995 2 7 4028 2194 1834 0.545
## 8 mauchge01m 26 1960 1987 4 2 3939 1902 2037 0.483
## 9 mckecbi01m 25 1915 1946 5 4 3619 1896 1723 0.524
## 10 stengca01m 25 1934 1965 4 10 3747 1905 1842 0.508
## 11 durocle01m 24 1939 1973 4 3 3717 2008 1709 0.540
## 12 mccarjo99m 24 1926 1950 3 9 3458 2125 1333 0.615
## 13 alstowa01m 23 1954 1976 2 7 3653 2040 1613 0.558
## 14 pinielo01m 23 1986 2010 5 6 3548 1835 1713 0.517
## 15 wrighha01m 23 1871 1893 4 6 2110 1225 885 0.581
## 16 ansonca01m 21 1875 1898 3 5 2243 1296 947 0.578
## 17 dykesji01m 21 1934 1961 6 0 2947 1406 1541 0.477
## 18 lasorto01m 21 1976 1996 1 8 3038 1599 1439 0.526
## 19 leylaji99m 21 1986 2012 4 5 3335 1676 1659 0.503
## 20 willidi02m 21 1967 1988 6 6 3022 1571 1451 0.520
## nameLast nameFirst
## 1 Mack Connie
## 2 LaRussa Tony
## 3 McGraw John
## 4 Cox Bobby
## 5 Harris Bucky
## 6 Torre Joe
## 7 Anderson Sparky
## 8 Mauch Gene
## 9 McKechnie Bill
## 10 Stengel Casey
## 11 Durocher Leo
## 12 McCarthy Joe
## 13 Alston Walter
## 14 Piniella Lou
## 15 Wright Harry
## 16 Anson Cap
## 17 Dykes Jimmie
## 18 Lasorda Tommy
## 19 Leyland Jim
## 20 Williams Dick
# Top 20 winningest managers (500 games minimum)
head(arrange(subset(mgrTotals, games >= 500), -WinPct), 20)
## managerID nyear yearBegin yearEnd nTeams nfirst games W L WinPct
## 1 mccarjo99m 24 1926 1950 3 9 3458 2125 1333 0.615
## 2 mutriji99m 9 1883 1891 2 3 1077 658 419 0.611
## 3 comisch01m 12 1883 1894 3 4 1381 840 541 0.608
## 4 seleefr99m 16 1890 1905 2 5 2146 1284 862 0.598
## 5 southbi01m 13 1929 1951 2 4 1748 1044 704 0.597
## 6 chancfr01m 11 1905 1923 3 4 1594 946 648 0.593
## 7 mcgrajo01m 33 1899 1932 3 11 4711 2763 1948 0.586
## 8 lopezal01m 17 1951 1969 2 2 2414 1410 1004 0.584
## 9 weaveea99m 17 1968 1986 1 6 2540 1480 1060 0.583
## 10 cochrmi01m 5 1934 1938 1 2 598 348 250 0.582
## 11 wrighha01m 23 1871 1893 4 6 2110 1225 885 0.581
## 12 ansonca01m 21 1875 1898 3 5 2243 1296 947 0.578
## 13 dyered01m 5 1946 1950 1 1 771 446 325 0.578
## 14 rowlapa99m 4 1915 1918 1 1 586 339 247 0.578
## 15 clarkfr01m 19 1897 1915 2 4 2783 1602 1181 0.576
## 16 girarjo01m 6 2006 2012 2 3 972 557 415 0.573
## 17 mcgunbi01m 5 1888 1896 4 2 575 327 248 0.569
## 18 johnsda02m 16 1984 2012 5 6 2281 1286 995 0.564
## 19 wardjo01m 7 1880 1894 4 0 732 412 320 0.563
## 20 moranpa01m 9 1915 1923 2 2 1334 748 586 0.561
## nameLast nameFirst
## 1 McCarthy Joe
## 2 Mutrie Jim
## 3 Comiskey Charlie
## 4 Selee Frank
## 5 Southworth Billy
## 6 Chance Frank
## 7 McGraw John
## 8 Lopez Al
## 9 Weaver Earl
## 10 Cochrane Mickey
## 11 Wright Harry
## 12 Anson Cap
## 13 Dyer Eddie
## 14 Rowland Pants
## 15 Clarke Fred
## 16 Girardi Joe
## 17 McGunnigle Bill
## 18 Johnson Davey
## 19 Ward John
## 20 Moran Pat
# Hmm. Most of these are 19th century managers.
# How about the modern era?
head(arrange(subset(mgrTotals, yearBegin >= 1900 & games >= 500), -WinPct), 20)
## managerID nyear yearBegin yearEnd nTeams nfirst games W L WinPct
## 1 mccarjo99m 24 1926 1950 3 9 3458 2125 1333 0.615
## 2 southbi01m 13 1929 1951 2 4 1748 1044 704 0.597
## 3 chancfr01m 11 1905 1923 3 4 1594 946 648 0.593
## 4 lopezal01m 17 1951 1969 2 2 2414 1410 1004 0.584
## 5 weaveea99m 17 1968 1986 1 6 2540 1480 1060 0.583
## 6 cochrmi01m 5 1934 1938 1 2 598 348 250 0.582
## 7 dyered01m 5 1946 1950 1 1 771 446 325 0.578
## 8 rowlapa99m 4 1915 1918 1 1 586 339 247 0.578
## 9 girarjo01m 6 2006 2012 2 3 972 557 415 0.573
## 10 johnsda02m 16 1984 2012 5 6 2281 1286 995 0.564
## 11 moranpa01m 9 1915 1923 2 2 1334 748 586 0.561
## 12 oneilst01m 14 1935 1954 4 1 1861 1040 821 0.559
## 13 alstowa01m 23 1954 1976 2 7 3653 2040 1613 0.558
## 14 coxbo01m 29 1978 2010 2 15 4505 2504 2001 0.556
## 15 dierkla01m 5 1997 2001 1 5 783 435 348 0.556
## 16 huggimi01m 17 1913 1929 2 6 2547 1413 1134 0.555
## 17 manuech01m 11 2000 2012 2 6 1706 947 759 0.555
## 18 terrybi01m 10 1932 1941 1 3 1484 823 661 0.555
## 19 martibi02m 16 1969 1988 5 6 2266 1253 1013 0.553
## 20 littlgr99m 4 2002 2007 2 0 648 358 290 0.552
## nameLast nameFirst
## 1 McCarthy Joe
## 2 Southworth Billy
## 3 Chance Frank
## 4 Lopez Al
## 5 Weaver Earl
## 6 Cochrane Mickey
## 7 Dyer Eddie
## 8 Rowland Pants
## 9 Girardi Joe
## 10 Johnson Davey
## 11 Moran Pat
## 12 O'Neill Steve
## 13 Alston Walter
## 14 Cox Bobby
## 15 Dierker Larry
## 16 Huggins Miller
## 17 Manuel Charlie
## 18 Terry Bill
## 19 Martin Billy
## 20 Little Grady
# Top 10 managers in terms of percentage of titles (league or divisional) -
# should bias toward managers post-1970 since more first place finishes
# are available
head(arrange(subset(mgrTotals, yearBegin >= 1900 & games >= 500),
-round(nfirst/nyear, 3)), 10)
## managerID nyear yearBegin yearEnd nTeams nfirst games W L WinPct
## 1 dierkla01m 5 1997 2001 1 5 783 435 348 0.556
## 2 howsedi01m 8 1978 1986 2 5 932 507 425 0.544
## 3 freyji99m 5 1980 1986 2 3 610 323 287 0.530
## 4 gardero01m 11 2002 2012 1 6 1783 932 851 0.523
## 5 manuech01m 11 2000 2012 2 6 1706 947 759 0.555
## 6 coxbo01m 29 1978 2010 2 15 4505 2504 2001 0.556
## 7 brenlbo01m 4 2001 2004 1 2 565 303 262 0.536
## 8 girarjo01m 6 2006 2012 2 3 972 557 415 0.573
## 9 kenneke99m 4 1993 1996 2 2 582 309 273 0.531
## 10 morgajo01m 4 1988 1991 1 2 563 301 262 0.535
## nameLast nameFirst
## 1 Dierker Larry
## 2 Howser Dick
## 3 Frey Jim
## 4 Gardenhire Ron
## 5 Manuel Charlie
## 6 Cox Bobby
## 7 Brenly Bob
## 8 Girardi Joe
## 9 Kennedy Kevin
## 10 Morgan Joe
# How about pre-1969?
head(arrange(subset(mgrTotals,
yearBegin >= 1900 & yearEnd <= 1969 & games >= 500),
-round(nfirst/nyear, 3)), 10)
## managerID nyear yearBegin yearEnd nTeams nfirst games W L WinPct
## 1 cochrmi01m 5 1934 1938 1 2 598 348 250 0.582
## 2 stengca01m 25 1934 1965 4 10 3747 1905 1842 0.508
## 3 mccarjo99m 24 1926 1950 3 9 3458 2125 1333 0.615
## 4 chancfr01m 11 1905 1923 3 4 1594 946 648 0.593
## 5 huggimi01m 17 1913 1929 2 6 2547 1413 1134 0.555
## 6 colliji01m 6 1901 1906 1 2 831 455 376 0.548
## 7 streega01m 6 1929 1938 2 2 697 365 332 0.524
## 8 southbi01m 13 1929 1951 2 4 1748 1044 704 0.597
## 9 terrybi01m 10 1932 1941 1 3 1484 823 661 0.555
## 10 carribi02m 7 1913 1929 1 2 989 489 500 0.494
## nameLast nameFirst
## 1 Cochrane Mickey
## 2 Stengel Casey
## 3 McCarthy Joe
## 4 Chance Frank
## 5 Huggins Miller
## 6 Collins Jimmy
## 7 Street Gabby
## 8 Southworth Billy
## 9 Terry Bill
## 10 Carrigan Bill
##############################################
# Density plot of the number of games managed:
##############################################
library('ggplot2')
ggplot(mgrTotals, aes(x = games)) + geom_density(fill = 'red', alpha = 0.3) +
labs(x = 'Number of games managed')
# Who managed more than 4000 games?
subset(mgrTotals, games >= 4000)
## managerID nyear yearBegin yearEnd nTeams nfirst games W L
## 12 andersp01m 26 1970 1995 2 7 4028 2194 1834
## 112 coxbo01m 29 1978 2010 2 15 4505 2504 2001
## 242 harribu01m 29 1924 1956 5 3 4377 2158 2219
## 334 larusto01m 33 1979 2011 3 12 5093 2728 2365
## 362 mackco01m 53 1894 1950 2 9 7679 3731 3948
## 398 mcgrajo01m 33 1899 1932 3 11 4711 2763 1948
## 611 torrejo01m 29 1977 2010 5 13 4323 2326 1997
## WinPct nameLast nameFirst
## 12 0.545 Anderson Sparky
## 112 0.556 Cox Bobby
## 242 0.493 Harris Bucky
## 334 0.536 LaRussa Tony
## 362 0.486 Mack Connie
## 398 0.586 McGraw John
## 611 0.538 Torre Joe
# Connie Mack had an advantage: he owned the Philadelphia A's :)
# Table of Tony LaRussa's team finishes:
with(subset(Managers, managerID == 'larusto01m'), table(rank))
## rank
## 1 2 3 4 5 7
## 12 4 8 5 4 1
# To include zero frequencies, one alternative is the tabulate() function:
with(subset(Managers, managerID == 'larusto01m'), tabulate(rank, 7))
## [1] 12 4 8 5 4 0 1
##############################################
# Scatterplot of winning percentage vs. number of games managed (min 100)
##############################################
ggplot(subset(mgrTotals, yearBegin >= 1900 & games >= 100),
aes(x = games, y = WinPct)) + geom_point() + geom_smooth() +
labs(x = 'Number of games managed')
## geom_smooth: method="auto" and size of largest group is <1000, so using
## loess. Use 'method = x' to change the smoothing method.
############################################
# Division titles
############################################
# Plot of number of first place finishes by managers with at least 8 years
# of experience in the divisional era (>= 1969):
divMgr <- subset(mgrTotals, yearBegin >= 1969 & nyear >= 8)
# Response is the number of titles
ggplot(divMgr, aes(x = nyear, y = nfirst)) +
geom_point(position = position_jitter(w = 0.2)) +
labs(x = 'Number of years', y = 'Number of divisional titles') +
geom_smooth()
## geom_smooth: method="auto" and size of largest group is <1000, so using
## loess. Use 'method = x' to change the smoothing method.
## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
# Response is the proportion of titles
ggplot(divMgr, aes(x = nyear, y = round(nfirst/nyear, 3))) +
geom_point(position = position_jitter(w = 0.2)) +
labs(x = 'Number of years', y = 'Proportion of divisional titles') +
geom_smooth()
## geom_smooth: method="auto" and size of largest group is <1000, so using
## loess. Use 'method = x' to change the smoothing method.
## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).