commit 257ceed13116940ad75cdb15a016aabab18d357f
parent 74fe1ed063bbf5f5ab782dc054cc0c7104765ab9
Author: Ivan Gankevich <i.gankevich@spbu.ru>
Date: Wed, 31 Mar 2021 17:26:41 +0300
Add daily statistics to the paper.
Diffstat:
7 files changed, 195 insertions(+), 27 deletions(-)
diff --git a/Makefile b/Makefile
@@ -15,6 +15,7 @@ all: build/iccsa-21-wind-slides.pdf
build/main.pdf: main.tex
build/main.pdf: build/inkscape/anemometer.eps
+build/main.pdf: build/daily-stats.eps
#build/main.pdf: build/gnuplot/anemometer.eps
#build/main.pdf: build/gnuplot/anemometer.svg
build/main.pdf:
@@ -32,7 +33,7 @@ build/iccsa-21-wind-slides.pdf: build/slides.pdf
cp $< $@
build/%.eps: build/%.svg
- inkscape -z --export-text-to-path --export-eps=$@ $<
+ inkscape -z --export-eps=$@ $<
build/gnuplot/%.svg: gnuplot/%.gnuplot
@mkdir -p build/gnuplot
@@ -40,7 +41,10 @@ build/gnuplot/%.svg: gnuplot/%.gnuplot
build/inkscape/%.eps: inkscape/%.svg
@mkdir -p build/inkscape
- inkscape -z --export-text-to-path --export-eps=$@ $<
+ inkscape -z --export-eps=$@ $<
+
+build/daily-stats.svg: gnuplot/daily-stats.gnuplot build/daily-stats
+ gnuplot -d $<
#build/main.zip: build/gnuplot/*.eps main.tex
build/main.zip: main.tex
diff --git a/R/daily-stats.R b/R/daily-stats.R
@@ -0,0 +1,52 @@
+library(DBI)
+
+source("R/calibrate.R")
+
+sample_to_speed <- function(x, coef, coef_reverse) {
+ x <- x - mean(x) # convert from unsigned to signed sensor values
+ x <- sign(x)*sqrt(abs(x)) # convert from force to velocity
+ # scale sensor values to wind speed using calibration coefficients
+ x[x<0] = x[x<0] / -coef
+ x[x>0] = x[x>0] / coef_reverse
+ # remove linear trend
+ t <- c(1:length(x))
+ reg <- lm(x~t)
+ x <- x - reg$fitted.values
+ x
+}
+
+db <- dbConnect(RSQLite::SQLite(), "samples/load-cell.sqlite3")
+RSQLite::initExtension(db)
+
+velocity = dbGetQuery(db,
+ "SELECT timestamp/(60*60*2) AS interval,
+ --STRFTIME('%Y-%m-%dT%H', DATETIME(timestamp, 'unixepoch')) AS timestamp,
+ timestamp,
+ t, x, y, z
+ FROM samples
+ ORDER BY timestamp")
+
+i <- 0
+result <- data.frame(interval=numeric(0),timestamp=numeric(0),
+ x_sd=numeric(0),y_sd=numeric(0),z_sd=numeric(0),
+ speed=numeric(0))
+unique_intervals <- unique(velocity$interval)
+pb <- txtProgressBar(min = 0, max = length(unique_intervals), style = 3)
+for (interval in unique_intervals) {
+ v <- (velocity[velocity$interval==interval, ])
+ v$x <- sample_to_speed(v$x, coefficient_x, coefficient_x_reverse)
+ v$y <- sample_to_speed(v$y, coefficient_y, coefficient_y_reverse)
+ v$z <- sample_to_speed(v$z, coefficient_z, coefficient_z_reverse)
+ new_row = nrow(result)+1
+ result[new_row, "interval"] <- interval
+ result[new_row, "timestamp"] <- min(v$timestamp)
+ result[new_row, "x_sd"] <- sd(v$x)
+ result[new_row, "y_sd"] <- sd(v$y)
+ result[new_row, "z_sd"] <- sd(v$z)
+ result[new_row, "speed"] <- sqrt(var(v$x) + var(v$y) + var(v$z))
+ i <- i + 1
+ setTxtProgressBar(pb, i)
+}
+write.table(result, "build/daily-stats", row.names=FALSE, quote=FALSE)
+
+dbDisconnect(db)
diff --git a/gnuplot/daily-stats.gnuplot b/gnuplot/daily-stats.gnuplot
@@ -1,13 +1,31 @@
-set terminal svg size 1920,1680 dynamic font 'Liberation Sans, 20'
-
-set datafile separator '|'
+load 'gnuplot/paired.pal'
+set terminal svg size 500,400 font 'Free Serif, 10' enhanced dynamic
set xtics nomirror out rotate 90
set ytics nomirror out
set border 1+2
+set key top center outside maxrows 1
+
+stats 'build/daily-stats' using 6
+
+# time axis
+set xdata time
+set timefmt "%s"
+set format x "%Y-%m-%d"
+set xtics 0,60*60*24
+set mxtics 1
+localtime(t) = t+3*60*60
+
set output 'build/daily-stats.svg'
-plot 'build/daily-stats' using 2:xtic(1) with linespoints title 'Max X',\
- 'build/daily-stats' using 3:xtic(1) with linespoints title 'Max Y',\
- 'build/daily-stats' using 4:xtic(1) with linespoints title 'Max Z',\
- 'build/daily-stats' using 5:xtic(1) with linespoints title 'Avg. X',\
- 'build/daily-stats' using 6:xtic(1) with linespoints title 'Avg. Y',\
- 'build/daily-stats' using 7:xtic(1) with linespoints title 'Avg. Z'
+
+do for [timestamp in "1614546000 1614978000 1615496400 1616187600 1616274000 1616360400 1616533200"] {
+ t = int(timestamp)
+ set obj t rectangle behind from first localtime(t), first 0 to first localtime(t)+24*60*60, graph 1 back fillstyle solid 1.0 fillcolor '#f0f0c0' lw 0
+}
+
+plot \
+ 'build/daily-stats' using (localtime($2)):3 with lines lc '#c04040' title 'υ_x',\
+ 'build/daily-stats' using (localtime($2)):4 with lines lc '#40c040' title 'υ_y',\
+ 'build/daily-stats' using (localtime($2)):5 with lines lc '#4040c0' title 'υ_z',\
+ 'build/daily-stats' using (localtime($2)):6 with lines lc '#404040' title 'υ',\
+ STATS_mean with lines lc '#808080' dashtype 2 notitle
+
diff --git a/gnuplot/paired.pal b/gnuplot/paired.pal
@@ -0,0 +1,28 @@
+# vim:filetype=gnuplot
+# line styles for ColorBrewer Paired
+# for use with qualitative/categorical data
+# provides 8 colors in 4 pairs
+# compatible with gnuplot >=4.2
+# author: Anna Schneider
+
+# line styles
+set style line 1 lt 1 lc rgb '#A6CEE3' # light blue
+set style line 2 lt 1 lc rgb '#1F78B4' # dark blue
+set style line 3 lt 1 lc rgb '#B2DF8A' # light green
+set style line 4 lt 1 lc rgb '#33A02C' # dark green
+set style line 5 lt 1 lc rgb '#FB9A99' # light red
+set style line 6 lt 1 lc rgb '#E31A1C' # dark red
+set style line 7 lt 1 lc rgb '#FDBF6F' # light orange
+set style line 8 lt 1 lc rgb '#FF7F00' # dark orange
+
+# palette
+set palette maxcolors 8
+set palette defined ( \
+ 0 '#A6CEE3',\
+ 1 '#1F78B4',\
+ 2 '#B2DF8A',\
+ 3 '#33A02C',\
+ 4 '#FB9A99',\
+ 5 '#E31A1C',\
+ 6 '#FDBF6F',\
+ 7 '#FF7F00' )
diff --git a/main.bib b/main.bib
@@ -56,3 +56,12 @@
publisher = {Holden-Day}
}
+@Manual{r-language,
+ title = {R: A Language and Environment for Statistical Computing},
+ author = {{R Core Team}},
+ organization = {R Foundation for Statistical Computing},
+ address = {Vienna, Austria},
+ year = {2020},
+ url = {https://www.R-project.org/},
+}
+
diff --git a/main.tex b/main.tex
@@ -5,6 +5,7 @@
\usepackage{booktabs}
\usepackage{cite}
\usepackage{graphicx}
+\usepackage{listings}
\usepackage{url}
\DeclareMathOperator{\Mode}{Mo}
@@ -18,6 +19,8 @@
Ivan Gankevich\textsuperscript{*}\orcidID{0000-0001-7067-6928}
}
+\lstset{language=R,literate={<-}{{$\gets$}}1}
+
\titlerunning{TODO}
\authorrunning{A.\,Gavrikov, I.\,Gankevich}
@@ -188,7 +191,7 @@ Parameter \(c_{t,x,y,z}\) controls the shape of the autocorrelation function in
the corresponding direction; it does not have simple relationship to the wind
velocity statistical parameters.
-\subsection{Data collection}
+\subsection{Data collection and preprocessing}
We installed anemometer on the tripod and placed it on the balcony. Then we
connected load cells to the microcontroller via HX711 load cell amplifiers and
@@ -196,9 +199,56 @@ programmed the microcontroller to record the output of each sensor every second
and print it on the standard output. Then we connected the microcontroller to
the computer via USB interface and wrote a script to collect the data coming
from the USB and store it in the SQLite database. We decided to store raw
-sensor values in the range of 0 to 65535 to be able to calibrate anemometer
+sensor values in the range from 0 to 65535 to be able to calibrate anemometer
later.
+We calibrated three-axis anemometer using commercial anemometer HP-866A and a
+fan that rotates with constant speed. First, we measured the wind speed that
+the fan generates when attached to commercial anemometer. Then we successively
+placed the fan behind and in front of each arm of our anemometer and measured
+values that the corresponding load cell reported for each side of the arm with
+and without the fan. Then we were able to calculate two coefficients for each
+axis: one for negative and another one for positive wind velocities. The
+coefficient equals the sensor value that is equivalent to the wind speed of 1
+m/s (table~\ref{tab-coefficients}).
+
+\begin{table}
+ \centering
+ \caption{Calibration coefficients for each arm of three-axis anemometer:
+ \(C_1\) is for negative values and \(C_2\) is for positive values.
+ \label{tab-coefficients}}
+ \begin{tabular}{lll}
+ \toprule
+ Axis & \(C_1\) & \(C_2\) \\
+ \midrule
+ X & 11.19 & 12.31 \\
+ Y & 11.46 & 11.25 \\
+ Z & 13.55 & 13.90 \\
+ \bottomrule
+ \end{tabular}
+\end{table}
+
+We noticed that ambient temperature affects values reported by our load cells:
+when the load cell heats up (cools down), it reports values that increase
+(decrease) linearly in time. We removed this linear trend from the measured
+values using linear regression. The code in R~\cite{r-language} that transforms
+sensor values into wind speed is presented in
+listing~\ref{lst-sample-to-speed}.
+
+\begin{lstlisting}[label={lst-sample-to-speed},caption={The code that transforms raw load cell sensor values into wind speed projection to the corresponding axis.}]
+sampleToSpeed <- function(x, c1, c2) {
+ x <- x - mean(x) # convert from unsigned to signed sensor values
+ x <- sign(x)*sqrt(abs(x)) # convert from force to velocity
+ # scale sensor values to wind speed using calibration coefficients
+ x[x<0] = x[x<0] / c1
+ x[x>0] = x[x>0] / c2
+ # remove linear trend
+ t <- c(1:length(x))
+ reg <- lm(x~t)
+ x - reg$fitted.values
+}
+\end{lstlisting}
+
Over a period of one month we collected 2.7M samples and filtered out 14\% of
them as having too large unnatural values. We attributed these values to
measurement errors as they spread uniformly across all the time span and are
@@ -206,19 +256,32 @@ surrounded by the values of regular magnitude. TODO picture of a large error.
From the remaning data we choose days with wind speeds above the average as
reported by EMERCOM of Russia\footnote{https://en.mchs.gov.ru/} for Saint
-Petersburg. TODO picture of average and max speeds grouped by day. After that
-we divided each day into two-hour intervals over which we collected the
-statistics individually. From these intervals we choose the ones with data
-distributions close to unimodal, so we could easily fit them into the model.
+Petersburg. After that we divided each day into two-hour intervals over which
+we collected the statistics individually. From these intervals we choose the
+ones with data distributions close to unimodal, so we could easily fit them
+into the model. The statistics for each interval is presented in
+figure~\ref{fig-intervals}.
+
+\begin{figure}
+ \centering
+ \includegraphics{build/daily-stats.eps}
+ \caption{The statistics for each interval of the collected data. Here
+ \(\upsilon_x\), \(\upsilon_y\) and \(\upsilon_z\) are mean speeds for
+ each interval for the corresponding axes, \(\upsilon\) is mean scalar wind speed
+ for each interval. The horizontal line shows overall average speed.
+ Yellow rectangles denote days when EMERCOM of Russia reported wind
+ speeds above average.
+ \label{fig-intervals}}
+\end{figure}
\begin{table}
\centering
\begin{tabular}{ll}
\toprule
- Time span & 31 days \\
- Size & 106 Mb \\
- No. of samples & 2\,736\,114 \\
- No. of samples after filtering & 2\,362\,523 \\
+ Time span & 36 days \\
+ Size & 122 Mb \\
+ No. of samples & 3\,157\,234 \\
+ No. of samples after filtering & 2\,775\,387 \\
Resolution & 1 sample per second \\
\bottomrule
\end{tabular}
diff --git a/sh/properties b/sh/properties
@@ -24,9 +24,3 @@ echo "Y mean = $y_mean"
echo "z mean = $z_mean"
echo -n "Sample count after filtering: "
sqlite3 $db "SELECT COUNT(*) FROM samples WHERE ABS(x-$x_mean)<10000 AND ABS(y-$y_mean)<10000 AND ABS(z-$z_mean)<10000"
-sqlite3 $db "
-SELECT strftime('%Y-%m-%d', datetime(timestamp,'unixepoch')) AS d, MAX(x),MAX(y),MAX(z), AVG(x),AVG(y),AVG(z)
-FROM samples
-WHERE ABS(x-$x_mean)<10000 AND ABS(y-$y_mean)<10000 AND ABS(z-$z_mean)<10000
-GROUP BY d
-" > build/daily-stats