D3: Heatmaps

For creating a heatmap, recall that we require three data attributes:

Often, however, we are not given this form of data. When we have attributes that take on discrete values, we typically have multiple data items that correspond to the same attributes. Hence, it becomes necessary to group (e.g. d3.nest).

In the following example, we are provided a dataset of barley statistics, where we will plot the average yield for each pair of variety and site. There are basic two ways (among others) we can implement this in D3. In our nested data join, we may create a grouped column per attribute, and then forego setting the x position in the subsequent data join on rectangles. Alternatively, we can forego the grouping, and instead directly set the x and y coordinates of the rectangles.

Here is the first option using groups:

d3.json('barley.json')
	.then(function(data)  {
		barley_data = data;
		plot_it_0();
	})

function plot_it_0()  {
	var svg0 = d3.select('#svg0-0');
	var x_range_pad = 100, y_range_pad = 100;
	var height = svg0.attr('height');

	var max_agg = -1e14, min_agg = 1e14;
	var nested_data = d3.nest()
		.key(d => d.variety)
		.key(d => d.site)
		.rollup(d_arr => {
			var mean_value = d3.mean(d_arr, d => d.yield);
			max_agg = Math.max(max_agg,mean_value);
			min_agg = Math.min(min_agg,mean_value);
			return mean_value;
		})
		.entries(barley_data)

	var unique_varieties = d3.set(barley_data, d => d.variety).values(), unique_sites = d3.set(barley_data, d => d.site).values();
	var x_scale = d3.scaleBand().domain(unique_varieties).range([x_range_pad,height]).paddingInner(0.05);
	var y_scale = d3.scaleBand().domain(unique_sites).range([0,height-y_range_pad]).paddingInner(0.05);
	var lum_scale = d3.scaleLinear().domain([min_agg,max_agg]).range([10,90]);
	var chroma_scale = d3.scaleLinear().domain([min_agg,max_agg]).range([110,10]);

	svg0.selectAll('cols').data(nested_data).enter()
		.append('g').attr('transform', d => 'translate('+x_scale(d.key)+',0)')
		.selectAll('cols').data(d => d.values).enter().append('rect')
		.attr('y', d => y_scale(d.key)).attr('width', x_scale.bandwidth()).attr('height', y_scale.bandwidth())
		.attr('fill', d => d3.hcl(10,chroma_scale(d.value),lum_scale(d.value)))

	create_axes_example0(svg0,x_range_pad,(height-y_range_pad),x_scale,y_scale,true);
}



And without groups:

d3.json('barley.json')
	.then(function(data)  {
		barley_data = data;
		plot_it_1();
	})

function plot_it_1()  {
	var svg0 = d3.select('#svg0-1');
	var x_range_pad = 100, y_range_pad = 100;
	var height = svg0.attr('height');

	var max_agg = -1e14, min_agg = 1e14;

	var nested_data = d3.nest()
		.key(d => d.variety+'-'+d.site)
		.rollup(d_arr => {
			var mean_value = d3.mean(d_arr, d => d.yield);
			max_agg = Math.max(max_agg,mean_value);
			min_agg = Math.min(min_agg,mean_value);
			return {aggregation:mean_value, variety:d_arr[0].variety, site:d_arr[0].site};
		})
		.entries(barley_data)

	var unique_varieties = d3.set(barley_data, d => d.variety).values(), unique_sites = d3.set(barley_data, d => d.site).values();
	var x_scale = d3.scaleBand().domain(unique_varieties).range([x_range_pad,height]).paddingInner(0.05);
	var y_scale = d3.scaleBand().domain(unique_sites).range([0,height-y_range_pad]).paddingInner(0.05);
	var lum_scale = d3.scaleLinear().domain([min_agg,max_agg]).range([10,90]);
	var chroma_scale = d3.scaleLinear().domain([min_agg,max_agg]).range([110,10]);

	svg0.selectAll('rows').data(nested_data).enter().append('rect')
		.attr('x', d => x_scale(d.value.variety)).attr('y', d => y_scale(d.value.site))
		.attr('width', x_scale.bandwidth()).attr('height', y_scale.bandwidth())
		.attr('fill', d => d3.hcl(10,chroma_scale(d.value.aggregation),lum_scale(d.value.aggregation)))

	create_axes_example0(svg0,x_range_pad,(height-y_range_pad),x_scale,y_scale,true);
}



Note that it was necessary to have a way to access the attribute values when setting the coordinates, unlike in the first example, where the key as part of nest gives us the attribute values at every nesting level. Here again is an example of “working backwards”:

Not just color

It is quite straightforward to use a different visual channel within each cell. So, let’s use size!

d3.json('barley.json')
	.then(function(data)  {
		barley_data = data;
		plot_it_2();
	})

function plot_it_2()  {
	var svg0 = d3.select('#svg0-2');
	var x_range_pad = 100, y_range_pad = 100;
	var height = svg0.attr('height');

	var max_agg = -1e14, min_agg = 1e14;

	var nested_data = d3.nest()
		.key(d => d.variety+'-'+d.site)
		.rollup(d_arr => {
			var mean_value = d3.mean(d_arr, d => d.yield);
			max_agg = Math.max(max_agg,mean_value);
			min_agg = Math.min(min_agg,mean_value);
			return {aggregation:mean_value, variety:d_arr[0].variety, site:d_arr[0].site};
		})
		.entries(barley_data)

	var unique_varieties = d3.set(barley_data, d => d.variety).values(), unique_sites = d3.set(barley_data, d => d.site).values();
	var x_scale = d3.scaleBand().domain(unique_varieties).range([x_range_pad,height]).paddingInner(0.05);
	var y_scale = d3.scaleBand().domain(unique_sites).range([0,height-y_range_pad]).paddingInner(0.05);
	var size_scale = d3.scaleSqrt().domain([min_agg,max_agg]).range([1,.5*x_scale.bandwidth()])

	svg0.selectAll('rows').data(nested_data).enter().append('circle')
		.attr('cx', d => .5*x_scale.bandwidth()+x_scale(d.value.variety))
		.attr('cy', d => .5*y_scale.bandwidth()+y_scale(d.value.site))
		.attr('r', d => size_scale(d.value.aggregation))
		.attr('fill', d => d3.hcl(10,50,60))

	create_axes_example0(svg0,x_range_pad,(height-y_range_pad),x_scale,y_scale,true);

	svg0.select('.leftaxis').selectAll('.tick').append('line')
		.attr('x2', (height-x_range_pad-x_scale.bandwidth()/2)).attr('stroke-width', 0.5).attr('stroke', d3.hcl(10,50,60))
	svg0.select('.bottomaxis').selectAll('.tick').append('line')
		.attr('y2', -(height-y_range_pad-y_scale.bandwidth()/2)).attr('stroke-width', 0.5).attr('stroke', d3.hcl(10,50,60))
}



Deeper Maps

We can even go one level deeper. Associated with the data is another attribute, year, which is one of 2 years. We can add an additional key to our nest, and by performing three data joins, we can plot a pair of bars in each cell.

d3.json('barley.json')
	.then(function(data)  {
		barley_data = data;
		plot_it_3();
	})

function plot_it_3()  {
	var svg0 = d3.select('#svg0-3');
	var x_range_pad = 100, y_range_pad = 100;
	var height = svg0.attr('height');

	var max_agg = -1e14, min_agg = 1e14;
	var nested_data = d3.nest()
		.key(d => d.variety)
		.key(d => d.site)
		.key(d => d.year)
		.rollup(d_arr => {
			var mean_value = d3.mean(d_arr, d => d.yield);
			max_agg = Math.max(max_agg,mean_value);
			min_agg = Math.min(min_agg,mean_value);
			return mean_value;
		})
		.entries(barley_data)

	var variety_nest = d3.nest()
		.key(d => d.variety)
		.rollup(d_arr => d3.mean(d_arr, d => d.yield))
		.entries(barley_data)
	variety_nest.sort((a,b) => a.value-b.value);
	var unique_varieties = variety_nest.map(d => d.key);

	var site_nest = d3.nest()
		.key(d => d.site)
		.rollup(d_arr => d3.mean(d_arr, d => d.yield))
		.entries(barley_data)
	site_nest.sort((a,b) => a.value-b.value);
	var unique_sites = site_nest.map(d => d.key);

	var unique_years = d3.set(barley_data, d => d.year).values()
	var x_scale = d3.scaleBand().domain(unique_varieties).range([x_range_pad,2*height]).paddingInner(0.15).paddingOuter(0.05);
	var y_scale = d3.scaleBand().domain(unique_sites).range([0,height-y_range_pad]).paddingInner(0.15).paddingOuter(0.05);
	var x_1_scale = d3.scaleBand().domain(unique_years).range([0,x_scale.bandwidth()]).paddingInner(0.05);
	var lum_scale = d3.scaleLinear().domain([min_agg,max_agg]).range([10,90]);
	var chroma_scale = d3.scaleLinear().domain([min_agg,max_agg]).range([110,10]);

	svg0.selectAll('cols').data(nested_data).enter()
		.append('g').attr('transform', d => 'translate('+x_scale(d.key)+',0)')
		.selectAll('rows').data(d => d.values).enter()
		.append('g').attr('transform', d => 'translate(0,'+y_scale(d.key)+')')
		.selectAll('groups').data(d => d.values).enter().append('rect')
		.attr('x', d => x_1_scale(d.key)).attr('width', x_1_scale.bandwidth()).attr('height', y_scale.bandwidth())
		.attr('fill', d => d3.hcl(10,chroma_scale(d.value),lum_scale(d.value)))

	create_axes_example0(svg0,x_range_pad,(height-y_range_pad),x_scale,y_scale,false);
}



Note the impact of:

As a further alternative, we can change the visual encoding to length:

d3.json('barley.json')
	.then(function(data)  {
		barley_data = data;
		plot_it_4();
	})

function plot_it_4()  {
	var svg0 = d3.select('#svg0-4');
	var x_range_pad = 100, y_range_pad = 100;
	var height = svg0.attr('height');

	var max_agg = -1e14, min_agg = 1e14;
	var nested_data = d3.nest()
		.key(d => d.variety)
		.key(d => d.site)
		.key(d => d.year)
		.rollup(d_arr => {
			var mean_value = d3.mean(d_arr, d => d.yield);
			max_agg = Math.max(max_agg,mean_value);
			min_agg = Math.min(min_agg,mean_value);
			return mean_value;
		})
		.entries(barley_data)

	var variety_nest = d3.nest()
		.key(d => d.variety)
		.rollup(d_arr => d3.mean(d_arr, d => d.yield))
		.entries(barley_data)
	variety_nest.sort((a,b) => a.value-b.value);
	var unique_varieties = variety_nest.map(d => d.key);

	var site_nest = d3.nest()
		.key(d => d.site)
		.rollup(d_arr => d3.mean(d_arr, d => d.yield))
		.entries(barley_data)
	site_nest.sort((a,b) => a.value-b.value);
	var unique_sites = site_nest.map(d => d.key);

	var unique_years = d3.set(barley_data, d => d.year).values()
	var x_scale = d3.scaleBand().domain(unique_varieties).range([x_range_pad,2*height]).paddingInner(0.15).paddingOuter(0.05);
	var y_scale = d3.scaleBand().domain(unique_sites).range([0,height-y_range_pad]).paddingInner(0.15).paddingOuter(0.05);
	var x_1_scale = d3.scaleBand().domain(unique_years).range([0,x_scale.bandwidth()]).paddingInner(0.05);
	var y_1_scale = d3.scaleLinear().domain([min_agg,max_agg]).range([y_scale.bandwidth(),0])

	svg0.selectAll('cols').data(nested_data).enter()
		.append('g').attr('transform', d => 'translate('+x_scale(d.key)+',0)')
		.selectAll('rows').data(d => d.values).enter()
		.append('g').attr('transform', d => 'translate(0,'+y_scale(d.key)+')').attr('class', 'cell')
		.selectAll('groups').data(d => d.values).enter().append('rect')
		.attr('x', d => x_1_scale(d.key)).attr('width', x_1_scale.bandwidth())
		.attr('y', d => y_1_scale(d.value)).attr('height', d => y_1_scale(min_agg)-y_1_scale(d.value))
		.attr('fill', d3.hcl(10,50,50))

	svg0.selectAll('.cell').append('rect')
		.attr('width', x_scale.bandwidth()).attr('height', y_scale.bandwidth()).attr('fill', d3.hcl(0,0,90)).lower()

	create_axes_example0(svg0,x_range_pad,(height-y_range_pad),x_scale,y_scale,false);
}



D3: Parallel Coordinates

In creating a parallel coordinates plot, recall that our data consists of an “arbitrary” number of attributes that are of an ordered type. Parallel coordinates are quite different from the spatial arrangements we have thus far seen, as each attribute is to be mapped to a particular slice of space, e.g. a set of vertical slices. This somewhat unusual arrangement also impacts our approach for implementing this type of visualization:

So with these considerations, let’s look at a minimal implementation of parallel coordinates:

d3.csv('nba_players.csv')
	.then(function(data)  {
		nba_data = data;
		selected_atts = ['Age','Block','Steal','Assist','Two Points','Three Points']
		d3.shuffle(selected_atts)
		nba_data.forEach(d => {
			selected_atts.forEach(att => {
				d[att] = +d[att];
			})
		})
		plot_nba();
	})

function plot_nba()  {
	var svg1 = d3.select('#svg1');
	var x_range_pad = 40, y_range_pad = 20;
	var width = svg1.attr('width'), height = svg1.attr('height');

	var pcp_scale_x = d3.scalePoint().domain(selected_atts).range([x_range_pad,width-x_range_pad]);
	var pcp_scale_y = selected_atts.map(att => {
		var extent = d3.extent(nba_data, d => d[att]);
		return d3.scaleLinear().domain([extent[0],extent[1]]).range([height-y_range_pad,y_range_pad]);
	});

	var line = d3.line()
		.x(d => pcp_scale_x(d.att))
		.y(d => pcp_scale_y[d.order](d.value))

	svg1.selectAll('empty').data(nba_data).enter().append('path')
		.attr('d', d => {
			var poly_line = selected_atts.map((a,i) => { return {att:a,order:i,value:d[a]}; })
			return line(poly_line)
		})
		.attr('fill', 'None').attr('stroke', d3.hcl(30,60,75)).attr('stroke-width', 2).attr('stroke-opacity', 0.12)

	create_axes_example1(svg1,x_range_pad,y_range_pad,pcp_scale_x,pcp_scale_y,selected_atts);
}