Window functions: referencing an indeterminate number of rows above

Problem:

You have a table of data where you want rows with a dummy value in a field to display the value in the row above.  You could use lag – but what if the row above had a dummy value too?  In that case you want it to look at the next row up.  And so on until it found the value you wanted…

Solution:

Utilise the aggregates with window functions to identify groups so that the non-dummy values above persist down the rows.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
IF OBJECT_ID(N'tempdb..#CreditHistory') IS NOT NULL DROP TABLE #CreditHistory;

CREATE TABLE #CreditHistory(
	 id				INT			IDENTITY(1,1)	NOT NULL
	,[name]			VARCHAR(20)					NOT NULL	
	,[status]		CHAR(1)						NULL
	,[dtRecorded]	DATE						NOT NULL
)


INSERT INTO #CreditHistory([name],[status],[dtRecorded])
VALUES  ('dave',	'5', '20170101')
	   ,('dave',	'5', '20170201')
	   ,('dave',	'5', '20170301')
	   ,('dave',	'4', '20170401')
	   ,('dave',	'I', '20170501')
	   ,('dave',	'I', '20170601')
	   ,('dave',	'I', '20170701')
	   ,('dave',	'I', '20170801')
	   ,('dave',	'0', '20170901')
	   ,('dave',	'I', '20171001')
	   ,('dave',	'1', '20171101')
	   ,('dave',	'2', '20171201')
	   ,('mike',	'I', '20170101')
	   ,('mike',	'1', '20170201')
	   ,('mike',	'2', '20170301')
	   ,('mike',	'3', '20170401')
	   ,('mike',	'4', '20170501')
	   ,('mike',	'I', '20170601')
	   ,('mike',	'I', '20170701')
	   ,('mike',	'1', '20170801')
	   ,('mike',	'2', '20170901')
	   ,('mike',	'I', '20171001')
	   ,('mike',	'I', '20171101')
	   ,('mike',	'0', '20171201')
	   
/*=================================================================================================
STEP 1:
		Use the CTE to identify the groups that each record falls into.
=================================================================================================*/
;WITH CTE_GROUPED AS (
	SELECT 
		 [id]
		,[name]
		,[status] 
		,[dtRecorded]
		 -- COUNT: Only counts non null values!
		,COUNT(
				-- CASE: returns null when you want it to be grouped with values above
				CASE WHEN [status] = 'I' THEN NULL ELSE [status] END
		 ) 
		 OVER (PARTITION BY [name] ORDER BY [name], [dtRecorded]) AS [grouping]

	FROM #CreditHistory
)


/*=================================================================================================
STEP 2:
		Return the results with the new status value.
		The [grouping] field is used to limit the max of the status- which is the newStatus
=================================================================================================*/
SELECT 
	 [id]
	,[name]
	,[status]
	,[dtRecorded]
	,[grouping] 

	 -- MAX: Discards null values
	,MAX(CASE WHEN [status] = 'I' THEN NULL ELSE [status] END) 
	 -- Add [grouping] to the partition clause to limit the max calculation 
	 OVER (PARTITION BY [name],[grouping] ORDER BY [name],[dtRecorded]) AS [newStatus]

FROM CTE_GROUPED

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out /  Change )

Google+ photo

You are commenting using your Google+ account. Log Out /  Change )

Twitter picture

You are commenting using your Twitter account. Log Out /  Change )

Facebook photo

You are commenting using your Facebook account. Log Out /  Change )

Connecting to %s