SQL Intermediate

By Salerno | October 6, 2022

1) Introduction

This a set of exercises that around the SQL Intermediate course that I’ve studied at DataCamp (which I really recommend!).

In fact, the databases used there was different that I’ve used here. In other to accomplish the goals of practice as you study, I studied a lot of new concepts of SQL language and I’ve tried to use in a different environment (RStudio) and a new dataset.

I hope you enjoy a lot as I do!


# 1) Important packages ----
library(DBI)
library(dplyr)
library(odbc)

# 2) Defining a PostgreSQL DS ----
ds_postgresql <- odbcListDataSources()[[1]][4]

# 3) Checking all drivers ----
drv_all <- sort(unique(odbcListDrivers()[[1]]))

# 4) Defining a PostgreSQL drive ----
drv_postgresql <- drv_all[9]

# 5) Connection ----
con <- dbConnect(odbc::odbc(), 
                 dsn = ds_postgresql,
                 server = "localhost",
                 uid = "postgres",
                 database= "rstudio_test")

# 6) Read back the full table
dtab = dbGetQuery(con, 'select * from gapminder')
summary(dtab)
##    country           continent              year         lifeexp     
##  Length:1704        Length:1704        Min.   :1952   Min.   :23.60  
##  Class :character   Class :character   1st Qu.:1966   1st Qu.:48.20  
##  Mode  :character   Mode  :character   Median :1980   Median :60.71  
##                                        Mean   :1980   Mean   :59.47  
##                                        3rd Qu.:1993   3rd Qu.:70.85  
##                                        Max.   :2007   Max.   :82.60  
##       pop              gdppercap       
##  Min.   :6.001e+04   Min.   :   241.2  
##  1st Qu.:2.794e+06   1st Qu.:  1202.1  
##  Median :7.024e+06   Median :  3531.8  
##  Mean   :2.960e+07   Mean   :  7215.3  
##  3rd Qu.:1.959e+07   3rd Qu.:  9325.5  
##  Max.   :1.319e+09   Max.   :113523.1

# 7) filter
dtab = dbGetQuery(con, 'select country from gapminder')
summary(dtab)
##    country         
##  Length:1704       
##  Class :character  
##  Mode  :character
head(dtab)
##       country
## 1 Afghanistan
## 2 Afghanistan
## 3 Afghanistan
## 4 Afghanistan
## 5 Afghanistan
## 6 Afghanistan

# 8) WHERE clause with number
dtab = dbGetQuery(con, 'select country, lifeexp
                  from gapminder
                  where lifeexp > 75')
summary(dtab)
##    country             lifeexp     
##  Length:173         Min.   :75.01  
##  Class :character   1st Qu.:76.15  
##  Mode  :character   Median :77.42  
##                     Mean   :77.68  
##                     3rd Qu.:78.82  
##                     Max.   :82.60
head(dtab)
##     country lifeexp
## 1   Albania  75.651
## 2   Albania  76.423
## 3 Argentina  75.320
## 4 Australia  76.320
## 5 Australia  77.560
## 6 Australia  78.830

# 9) WHERE clause with text

dtab = dbGetQuery(con, "select country, lifeexp
                  from gapminder
                  where country = 'Argentina'")
summary(dtab)
##    country             lifeexp     
##  Length:12          Min.   :62.48  
##  Class :character   1st Qu.:65.51  
##  Mode  :character   Median :69.21  
##                     Mean   :69.06  
##                     3rd Qu.:72.22  
##                     Max.   :75.32
head(dtab)
##     country lifeexp
## 1 Argentina  62.485
## 2 Argentina  64.399
## 3 Argentina  65.142
## 4 Argentina  65.634
## 5 Argentina  67.065
## 6 Argentina  68.481

# 10) WHERE clause with AND

dtab = dbGetQuery(con, "select *
                        from gapminder
                        where country = 'Argentina' and lifeexp > 65 and pop > 25000000")

summary(dtab)
##    country           continent              year         lifeexp     
##  Length:7           Length:7           Min.   :1977   Min.   :68.48  
##  Class :character   Class :character   1st Qu.:1984   1st Qu.:70.36  
##  Mode  :character   Mode  :character   Median :1992   Median :71.87  
##                                        Mean   :1992   Mean   :72.00  
##                                        3rd Qu.:2000   3rd Qu.:73.81  
##                                        Max.   :2007   Max.   :75.32  
##       pop             gdppercap    
##  Min.   :26983828   Min.   : 8798  
##  1st Qu.:30481146   1st Qu.: 9069  
##  Median :33958947   Median : 9308  
##  Mean   :33820225   Mean   :10010  
##  3rd Qu.:37267292   3rd Qu.:10523  
##  Max.   :40301927   Max.   :12779
head(dtab)
##     country continent year lifeexp      pop gdppercap
## 1 Argentina  Americas 1977  68.481 26983828 10079.027
## 2 Argentina  Americas 1982  69.942 29341374  8997.897
## 3 Argentina  Americas 1987  70.774 31620918  9139.671
## 4 Argentina  Americas 1992  71.868 33958947  9308.419
## 5 Argentina  Americas 1997  73.275 36203463 10967.282
## 6 Argentina  Americas 2002  74.340 38331121  8797.641

# 11) WHERE clause with OR

dtab = dbGetQuery(con, "select country, continent, year
                  from gapminder
                  where year = 1977 or year = 1982")
summary(dtab)
##    country           continent              year     
##  Length:284         Length:284         Min.   :1977  
##  Class :character   Class :character   1st Qu.:1977  
##  Mode  :character   Mode  :character   Median :1980  
##                                        Mean   :1980  
##                                        3rd Qu.:1982  
##                                        Max.   :1982
head(dtab)
##       country continent year
## 1 Afghanistan      Asia 1977
## 2 Afghanistan      Asia 1982
## 3     Albania    Europe 1977
## 4     Albania    Europe 1982
## 5     Algeria    Africa 1977
## 6     Algeria    Africa 1982
# 12) WHERE clause with and using two attributes
dtab = dbGetQuery(con,
                  "select country, continent, pop, lifeexp
                  from gapminder
                  where (pop > 1000000 and pop < 1500000)
                  and (lifeexp > 65 and lifeexp < 71)")
summary(dtab)
##    country           continent              pop             lifeexp     
##  Length:12          Length:12          Min.   :1039009   Min.   :65.57  
##  Class :character   Class :character   1st Qu.:1086304   1st Qu.:68.81  
##  Mode  :character   Mode  :character   Median :1127290   Median :69.40  
##                                        Mean   :1145466   Mean   :69.08  
##                                        3rd Qu.:1158281   3rd Qu.:69.76  
##                                        Max.   :1489518   Max.   :70.74
head(dtab)
##               country continent     pop lifeexp
## 1              Kuwait      Asia 1140357  69.343
## 2           Mauritius    Africa 1042663  68.740
## 3           Mauritius    Africa 1096202  69.745
## 4           Mauritius    Africa 1149818  70.736
## 5            Slovenia    Europe 1489518  65.570
## 6 Trinidad and Tobago  Americas 1039009  68.300

# 13) WHERE clause and BETWEEN
dtab = dbGetQuery(con,
                  'select country, continent, year, pop
                  from gapminder
                  where year between 1990 and 1995')

summary(dtab)
##    country           continent              year           pop           
##  Length:142         Length:142         Min.   :1992   Min.   :1.259e+05  
##  Class :character   Class :character   1st Qu.:1992   1st Qu.:3.606e+06  
##  Mode  :character   Mode  :character   Median :1992   Median :8.689e+06  
##                                        Mean   :1992   Mean   :3.599e+07  
##                                        3rd Qu.:1992   3rd Qu.:2.271e+07  
##                                        Max.   :1992   Max.   :1.165e+09
head(dtab)
##       country continent year      pop
## 1 Afghanistan      Asia 1992 16317921
## 2     Albania    Europe 1992  3326498
## 3     Algeria    Africa 1992 26298373
## 4      Angola    Africa 1992  8735988
## 5   Argentina  Americas 1992 33958947
## 6   Australia   Oceania 1992 17481977

# 14) WHERE clause with LIKE
dtab = dbGetQuery(con,
                  "select *
                  from gapminder
                  where country like 'A%'" )
summary(dtab)
##    country           continent              year         lifeexp     
##  Length:84          Length:84          Min.   :1952   Min.   :28.80  
##  Class :character   Class :character   1st Qu.:1966   1st Qu.:42.04  
##  Mode  :character   Mode  :character   Median :1980   Median :67.27  
##                                        Mean   :1980   Mean   :59.95  
##                                        3rd Qu.:1993   3rd Qu.:72.04  
##                                        Max.   :2007   Max.   :81.23  
##       pop             gdppercap      
##  Min.   : 1282697   Min.   :  635.3  
##  1st Qu.: 7101494   1st Qu.: 2724.7  
##  Median :10830537   Median : 5380.7  
##  Mean   :13774802   Mean   : 8777.0  
##  3rd Qu.:19562728   3rd Qu.:10954.1  
##  Max.   :40301927   Max.   :36126.5
head(dtab)
##       country continent year lifeexp      pop gdppercap
## 1 Afghanistan      Asia 1952  28.801  8425333  779.4453
## 2 Afghanistan      Asia 1957  30.332  9240934  820.8530
## 3 Afghanistan      Asia 1962  31.997 10267083  853.1007
## 4 Afghanistan      Asia 1967  34.020 11537966  836.1971
## 5 Afghanistan      Asia 1972  36.088 13079460  739.9811
## 6 Afghanistan      Asia 1977  38.438 14880372  786.1134

# 15) WHERE clause with LIKE
dtab = dbGetQuery(con,
                  "select country, pop
                  from gapminder
                  where country like 'A%'")
summary(dtab)
##    country               pop          
##  Length:84          Min.   : 1282697  
##  Class :character   1st Qu.: 7101494  
##  Mode  :character   Median :10830537  
##                     Mean   :13774802  
##                     3rd Qu.:19562728  
##                     Max.   :40301927
head(dtab)
##       country      pop
## 1 Afghanistan  8425333
## 2 Afghanistan  9240934
## 3 Afghanistan 10267083
## 4 Afghanistan 11537966
## 5 Afghanistan 13079460
## 6 Afghanistan 14880372

# 16) WHERE clause with LIKE
dtab = dbGetQuery(con,
                  "select country, pop
                  from gapminder
                  where country like '%n'")
summary(dtab)
##    country               pop           
##  Length:192         Min.   :   120447  
##  Class :character   1st Qu.:  1559128  
##  Mode  :character   Median :  8288454  
##                     Mean   : 23030281  
##                     3rd Qu.: 28308158  
##                     Max.   :169270617
head(dtab)
##       country      pop
## 1 Afghanistan  8425333
## 2 Afghanistan  9240934
## 3 Afghanistan 10267083
## 4 Afghanistan 11537966
## 5 Afghanistan 13079460
## 6 Afghanistan 14880372

# 17) WHERE clause with NOT LIKE
dtab = dbGetQuery(con,
                  "select country, pop
                  from gapminder
                  where country not like 'A%'")
summary(dtab)
##    country               pop           
##  Length:1620        Min.   :6.001e+04  
##  Class :character   1st Qu.:2.682e+06  
##  Mode  :character   Median :6.671e+06  
##                     Mean   :3.042e+07  
##                     3rd Qu.:1.962e+07  
##                     Max.   :1.319e+09
head(dtab)
##   country    pop
## 1 Bahrain 120447
## 2 Bahrain 138655
## 3 Bahrain 171863
## 4 Bahrain 202182
## 5 Bahrain 230800
## 6 Bahrain 297410

# 18) WHERE clause with IN
dtab = dbGetQuery(con,
                  "select country, continent
                  from gapminder
                  where continent in ('Africa', 'Europe')")

summary(dtab)
##    country           continent        
##  Length:984         Length:984        
##  Class :character   Class :character  
##  Mode  :character   Mode  :character
head(dtab)
##   country continent
## 1 Albania    Europe
## 2 Albania    Europe
## 3 Albania    Europe
## 4 Albania    Europe
## 5 Albania    Europe
## 6 Albania    Europe
# 19) Using arithmetic clauses
# numerical data only
# aggregate functions execute in a column, otherwise arithmetic functions calcultes through the rows (events)
dtab = dbGetQuery(con,
                  'SELECT AVG(pop)
                  FROM gapminder')

dtab
##        avg
## 1 29601212

# 20) numerical data only
dtab = dbGetQuery(con,
                  'SELECT SUM(pop)
                  FROM gapminder')

dtab
##           sum
## 1 50440465801
# 21) various data types
dtab = dbGetQuery(con,
                  'SELECT MIN(pop)
                  FROM gapminder')

dtab
##     min
## 1 60011

# 22) various data types
dtab = dbGetQuery(con,
                  'SELECT MIN(country)
                  FROM gapminder')

dtab
##           min
## 1 Afghanistan

# 23) various data types
dtab = dbGetQuery(con,
                  'SELECT MAX(country)
                  FROM gapminder')

dtab
##        max
## 1 Zimbabwe

# 24) using a negative parameter in ROUND you will obtain a round number two decimals to the left
dtab = dbGetQuery(con,
                  "select ROUND(AVG(pop), -2)
                  from gapminder
                  where year = 1957")

dtab
##      round
## 1 18763400

# 25) GROUP BY and ORDER BY
dtab = dbGetQuery(con,
                  "select year, continent, country, min(pop)
                  from gapminder
                  group by year, continent, country
                  order by year, min desc")
head(dtab)
##   year continent       country       min
## 1 1952      Asia         China 556263527
## 2 1952      Asia         India 372000000
## 3 1952  Americas United States 157553000
## 4 1952      Asia         Japan  86459025
## 5 1952      Asia     Indonesia  82052000
## 6 1952    Europe       Germany  69145952

# 26) GROUP BY, HAVING and ORDER BY
dtab = dbGetQuery(con,
                  "select country, avg(lifeexp) as life_exp_avg
                  from gapminder
                  group by country
                  having avg(lifeexp) > 70
                  order by life_exp_avg desc")

dtab
##             country life_exp_avg
## 1           Iceland     76.51142
## 2            Sweden     76.17700
## 3            Norway     75.84300
## 4       Netherlands     75.64850
## 5       Switzerland     75.56508
## 6            Canada     74.90275
## 7             Japan     74.82692
## 8         Australia     74.66292
## 9           Denmark     74.37017
## 10           France     74.34892
## 11            Spain     74.20342
## 12            Italy     74.01383
## 13      New Zealand     73.98950
## 14   United Kingdom     73.92258
## 15           Greece     73.73317
## 16           Israel     73.64583
## 17          Belgium     73.64175
## 18 Hong Kong, China     73.49283
## 19    United States     73.47850
## 20          Germany     73.44442
## 21          Austria     73.10325
## 22          Ireland     73.01725
## 23          Finland     72.99192
## 24      Puerto Rico     72.73933
## 25         Slovenia     71.60075
## 26   Czech Republic     71.51050
## 27        Singapore     71.22025
## 28             Cuba     71.04508
## 29          Uruguay     70.78158
## 30  Slovak Republic     70.69608
## 31         Portugal     70.41983
## 32           Taiwan     70.33667
## 33       Montenegro     70.29917
## 34       Costa Rica     70.18142
## 35           Poland     70.17692
## 36          Croatia     70.05592
comments powered by Disqus