# Tuesday, 25 October 2011

Character based data types, especially the var types varchar and nvarchar allow to declare a really big amount and just use a tiny bit and it doesn’t even hurt in SQL Server. However, it may in the application. Today I was working with a customer for the implementation of some third party software that would be doing analysis on the data. It would load the really over-dimensioned (n)varchar types in something like a (n)char.... you get the picture, huge amounts of wasted space. So from the databases side, a bit of restructuring was desirable. To see how much we can actually save, I created query to report on the character usage;

CREATE TABLE ##char_columns (
[SCHEMA]
VARCHAR(128) NULL,
[TABLE]
VARCHAR(128) NULL,

[COLUMN]
VARCHAR(128) NULL,

[data_type]
VARCHAR(128) NULL,

[max_length]
INT NULL,

[is_replicated]
BIT NULL,

[actual_max_length]
INT NULL,

[row_count]
BIGINT NULL,

[not_null_count]
BIGINT NULL)

INSERT
##char_columns

SELECT
s.name AS 'schema'
,
t
.name AS
'table'
,
c
.name AS
'column'
,
y
.name AS
'data_type'
,
c
.max_length
,
c
.is_replicated
,
NULL
,
NULL
,
NULL

FROM
sys.tables t

INNER
JOIN sys.schemas s ON t.schema_id = s.schema_id

INNER
JOIN sys.columns c ON t.object_id = c.object_id

INNER
JOIN sys.types y ON c.system_type_id = y.system_type_id

WHERE
t.type = 'U'
AND y.name IN ('varchar','char','nchar','nvarchar')
WHILE
(SELECT COUNT(*) FROM ##char_columns WHERE actual_max_length IS NULL) > 0

BEGIN
TRY

DECLARE
@sql NVARCHAR(4000)
, @actual_max_length INT , @column VARCHAR(128) , @schema VARCHAR(128) , @table VARCHAR(128)
SELECT
TOP 1 @schema = [SCHEMA], @table = [TABLE], @column = [COLUMN]
FROM ##char_columns

WHERE
actual_max_length IS NULL

-- Set the actual_max_length to -1 if a column has only null values or no rows.

SELECT @sql = 'UPDATE ##char_columns
SET [actual_max_length] = (SELECT ISNULL(MAX(LEN([' + @column + '])),-1) FROM [' + @schema + '].[' + @table + '])
,
      [row_count] = (SELECT COUNT_BIG(*) FROM [' + @schema + '].[' + @table + '])
,
      [not_null_count] = (SELECT COUNT_BIG([' + @column + ']) FROM [' + @schema + '].[' + @table + ']) 
WHERE [schema] = ''' + @schema + '''
 
      AND [table] = ''' + @table + '''
 
      AND [column] = ''' + @column + ''''
EXEC
(@sql)

END
TRY

BEGIN
CATCH

DECLARE
@err_num NVARCHAR(20), @err_msg NVARCHAR(2048), @full_msg NVARCHAR(2048)

SELECT
@err_num = ERROR_NUMBER(), @err_msg = ERROR_MESSAGE()
SELECT @full_msg = 'ERROR: ' + @err_num + ' DESCR: ' + @err_msg + ' QUERY: ' + @sql

RAISERROR
(@full_msg,16,0)

BREAK

END
CATCH

SELECT
* FROM ##char_columns

DROP
TABLE ##char_columns

Be aware that varchar(max) and nvarchar(max) will give a max_length of –1 and that actual_max_length is –1 if no rows exist in the table or the column only contains NULLs (refer to the row_count and not_null_count to determine the situation).