项目作者: daranzolin

项目描述 :
The California Department of Education in R
高级语言: R
项目地址: git://github.com/daranzolin/rCAEDDATA.git
创建时间: 2017-10-05T05:41:57Z
项目社区:https://github.com/daranzolin/rCAEDDATA

开源协议:Other

下载


The California Department of Education in R

The California Department of Education provides ample data. Now, that data is available in R.

Installation

  1. devtools::install_github("daranzolin/rCAEDDATA")
  2. library(rCAEDDATA)

Available Datasets

Examples

Graduates

  1. library(rCAEDDATA)
  2. library(tidyverse)
  3. #> Loading tidyverse: ggplot2
  4. #> Loading tidyverse: tibble
  5. #> Loading tidyverse: tidyr
  6. #> Loading tidyverse: readr
  7. #> Loading tidyverse: purrr
  8. #> Loading tidyverse: dplyr
  9. #> Conflicts with tidy packages ----------------------------------------------
  10. #> filter(): dplyr, stats
  11. #> lag(): dplyr, stats
  12. data("graduates")
  13. graduates %>%
  14. group_by(YEAR) %>%
  15. summarize(total_grads = sum(GRADS),
  16. Yes = sum(UC_GRADS),
  17. No = total_grads - Yes) %>%
  18. select(-total_grads) %>%
  19. gather(Eligibility, Graduates, -YEAR) %>%
  20. ggplot(aes(YEAR, Graduates, fill = Eligibility)) +
  21. geom_bar(stat = "identity", color = "black") +
  22. labs(x = "Year",
  23. y = "Graduates",
  24. title = "California High School Graduates, 1992-2016",
  25. fill = "UC Eligible?") +
  26. scale_y_continuous(labels = scales::comma) +
  27. scale_fill_manual(values = c("yellow", "lightblue")) +
  28. theme_minimal()

Dropouts

  1. data("dropouts")
  2. dropouts %>%
  3. select(GENDER, matches("D[0-9]{1,2}")) %>%
  4. gather(GRADE, DROPOUTS, -GENDER) %>%
  5. mutate(GRADE = as.numeric(stringr::str_replace(GRADE, "D", ""))) %>%
  6. group_by(GENDER, GRADE) %>%
  7. summarize(DROPOUTS = sum(DROPOUTS)) %>%
  8. ggplot(aes(GRADE, DROPOUTS, fill = GENDER)) +
  9. geom_bar(stat = "identity", position = "fill") +
  10. scale_x_continuous(breaks = c(7:12)) +
  11. labs(x = "Grade",
  12. y = "",
  13. title = "Proportion of Student Dropouts by Gender, Grades 7-12",
  14. fill = "Gender") +
  15. theme_minimal()

Enrollments

  1. enrollments %>%
  2. mutate(ETHNIC = case_when(
  3. ETHNIC == 0 ~ "Not Reported",
  4. ETHNIC == 1 ~ "American Indian",
  5. ETHNIC == 2 ~ "Asian",
  6. ETHNIC == 3 ~ "Pacific Islander",
  7. ETHNIC == 4 ~ "Filipino",
  8. ETHNIC == 5 ~ "Hispanic",
  9. ETHNIC == 6 ~ "African American",
  10. ETHNIC == 7 ~ "White",
  11. ETHNIC == 9 | ETHNIC == 8 ~ "Two or More")
  12. ) %>%
  13. filter(DISTRICT %in% c("Santa Clara Unified",
  14. "Milpitas Unified",
  15. "San Jose Unified",
  16. "Fremont Union High",
  17. "Mountain View-Los Altos Union High",
  18. "Cupertino Union",
  19. "Campbell Union",
  20. "Cambrian",
  21. "Palo Alto Unified")
  22. ) %>%
  23. select(DISTRICT, YEAR, ETHNIC, starts_with("GR_")) %>%
  24. gather(GRADE, STUDENTS, -DISTRICT, -YEAR, -ETHNIC) %>%
  25. group_by(DISTRICT, YEAR, ETHNIC) %>%
  26. summarize(TOTAL_STUDENTS = sum(STUDENTS)) %>%
  27. ggplot(aes(YEAR, TOTAL_STUDENTS, fill = ETHNIC)) +
  28. geom_bar(stat = "identity", position = "fill") +
  29. facet_wrap(~DISTRICT, nrow = 3) +
  30. labs(x = "Year",
  31. y = "",
  32. title = "Ethnic Diversity in Silicon Valley, 2007-2017",
  33. subtitle = "Santa Clara Districts",
  34. fill = "Ethnicity") +
  35. theme_minimal() +
  36. theme(axis.text.x = element_text(angle = 45, hjust = 1))

Suspensions

  1. library(maps)
  2. #>
  3. #> Attaching package: 'maps'
  4. #> The following object is masked from 'package:purrr':
  5. #>
  6. #> map
  7. library(ggmap)
  8. library(mapdata)
  9. states <- map_data("state")
  10. ca_df <- subset(states, region == "california")
  11. counties <- map_data("county")
  12. ca_county <- subset(counties, region == "california")
  13. drug_data <- suspensions %>%
  14. filter(YEAR == "2014-15",
  15. AGGEGATELEVEL == "O") %>%
  16. group_by(NAME) %>%
  17. summarize(TOTAL_DRUGS = sum(DRUGS, na.rm = TRUE),
  18. TOTAL = sum(TOTAL, na.rm = TRUE),
  19. DRUG_PROP = round(TOTAL_DRUGS/TOTAL, 2))
  20. map_data <- left_join(ca_county, drug_data %>%
  21. mutate(subregion = stringr::str_to_lower(NAME)),
  22. by = "subregion")
  23. ggplot(data = ca_df, mapping = aes(x = long, y = lat, group = group)) +
  24. coord_fixed(1.3) +
  25. geom_polygon(color = "black", fill = "gray") +
  26. geom_polygon(data = map_data, aes(fill = DRUG_PROP), color = "white") +
  27. geom_polygon(color = "black", fill = NA) +
  28. labs(title = "Proportion of Drugs-Related Suspensions by County, 2014-2015",
  29. fill = "Proportion") +
  30. theme_void() +
  31. viridis::scale_fill_viridis()