Last Updated on Friday, 07 December 2012
Why?
We want to scramble sensitive data that we send to suppliers for support or analysis. With inspiration from: "Obfuscating your SQL Server Data" by John Magnabosco but tweaked for our purposes. NOTE that the following has only ever been run on development environments and I would not recommend running this on a production system as I have not tested the performance and database load.
- Before:
- StudentID StudentName DateOfBirth
- ----------- -------------------- -------------
- 1 John Smith 1990-03-21
- 2 Fred Bloggs 1988-11-02
- 3 Another User 1985-07-11
- 4 Yet Another User 1977-06-25
- After:
- StudentID StudentName DateOfBirth
- ----------- -------------------- -------------
- 1 mStnoh iJh 1926-01-02
- 2 lgreg BFdos 1969-03-14
- 3 onrUest ehAr 1968-05-05
- 4 otentre AhYe Usr 1964-08-08
Precursor
You will need to be able to create the following view to generate random numbers on SQL Server.
- -- Used to reference RAND with in a function
- CREATE VIEW dbo.vwRandom
- AS
- SELECT RAND() as RandomValue;
- GO
The Function
Again you need permission to create this function. Don't forget to GRANT permission to execute this function for whatever user/system account that will run it.
- -- Drop the function if it already exists
- IF OBJECT_ID ('ufn_DataJumble', 'FN') IS NOT NULL
- DROP FUNCTION ufn_DataJumble ;
- GO
- -- Create user defined function
- CREATE FUNCTION ufn_DataJumble
- (
- @OrigVal varchar(max)
- )
- RETURNS varchar(max)
- WITH ENCRYPTION
- AS
- BEGIN
- -- Variables used
- DECLARE @NewVal varchar(max);
- DECLARE @OrigLen int;
- DECLARE @CurrLen int;
- DECLARE @LoopCt int;
- DECLARE @Rand int;
- -- for dates/times
- DECLARE @MinYear int;
- DECLARE @MaxYear int;
- DECLARE @RandYear int;
- DECLARE @RandMonth int;
- DECLARE @RandDay int;
- DECLARE @RandHour int;
- DECLARE @RandMinute int;
- DECLARE @RandSeconds int;
- DECLARE @RandMSeconds int;
- DECLARE @DateVal date;
- DECLARE @TimeVal time;
- DECLARE @DateMatch int;
- DECLARE @TimeMatch int;
- DECLARE @ColumnTypeIsDate int;
- -- Format given value (prevents CHAR datatype issue)
- SET @OrigVal = LTRIM(RTRIM(@OrigVal));
- -- Set variable default values
- SET @NewVal = '';
- SET @OrigLen = DATALENGTH(@OrigVal);
- SET @CurrLen = @OrigLen;
- SET @LoopCt = 1;
- SET @DateVal = ''; -- 1900-01-01
- SET @TimeVal = ''; -- 00:00:00.000
- SET @DateMatch = 0;
- SET @TimeMatch = 0;
- SET @ColumnTypeIsDate = 0;
- -- Determine if Given Value is date
- IF @OrigVal IS NULL
- RETURN NULL;
- -- Determine if Given Value is date
- IF ISDATE(@OrigVal)=1
- SET @ColumnTypeIsDate = 1;
- -- Determine if Given Value is date (ISDATE does not recognize CAST AS DATE)
- IF @OrigLen=10
- AND (LEN(@OrigVal) - LEN(REPLACE(@OrigVal, '-', '')))=2
- AND ISNUMERIC(SUBSTRING(@OrigVal, 1, 4))=1
- AND ISNUMERIC(SUBSTRING(@OrigVal, 6, 2))=1
- AND ISNUMERIC(SUBSTRING(@OrigVal, 9, 2))=1
- SET @ColumnTypeIsDate = 1;
- -- Determine if this is a date/time value
- IF @ColumnTypeIsDate=1
- BEGIN
- -- DETERMINE IF THIS FIELD CONTAINS A DATE VALUE
- SET @DateMatch =
- CASE
- WHEN CAST(CAST(@OrigVal AS date) AS VARCHAR(10))<>'1900-01-01' THEN 1
- ELSE 0
- END;
- -- DETERMINE IF THIS FIELD CONTAINS A TIME VALUE
- SET @TimeMatch =
- CASE
- WHEN CAST(CAST(@OrigVal AS time) AS VARCHAR(10))<>'00:00:00.000' THEN 1
- ELSE 0
- END;
- -- DETERMINE TIME VALUE IF TIME WAS IN GIVEN VALUE
- IF @DateMatch=1
- BEGIN
- -- SET YEAR RANGE (1912 - 1994: for Birthdate specifically: Older than 18 but younger than 100)
- SET @MinYear = DATEPART(YEAR,GETDATE())-100; -- 100 years ago from today
- SET @MaxYear = DATEPART(YEAR,GETDATE())-18; -- 18 years ago from today
- -- GET RANDOM DATE VALUES
- SELECT @RandYear = ((@MinYear + 1) - @MaxYear) * RandomValue + @MaxYear FROM dbo.vwRandom;
- SELECT @RandMonth = (1 - 12) * RandomValue + 12 FROM dbo.vwRandom;
- SELECT @RandDay = (1 - 28) * RandomValue + 28 FROM dbo.vwRandom; -- prevent invalid dates (specifically 31 days in feb)
- SET @DateVal =
- CAST(@RandYear AS CHAR(4)) + '-'
- + STUFF(@RandMonth, 1, 0, REPLICATE('0', 2 - LEN(@RandMonth))) + '-'
- + STUFF(@RandDay, 1, 0, REPLICATE('0', 2 - LEN(@RandDay)));
- END
- -- DETERMINE TIME VALUE IF TIME WAS IN GIVEN VALUE
- IF @TimeMatch=1
- BEGIN
- -- SET RANDOM TIME VALUE (include Milliseconds)
- SELECT @RandHour = (0 - 23) * RandomValue + 23 FROM dbo.vwRandom;
- SELECT @RandMinute = (0 - 59) * RandomValue + 59 FROM dbo.vwRandom;
- SELECT @RandSeconds = (0 - 59) * RandomValue + 59 FROM dbo.vwRandom;
- SELECT @RandMSeconds = (0 - 999) * RandomValue + 999 FROM dbo.vwRandom;
- SET @TimeVal =
- STUFF(@RandHour, 1, 0, REPLICATE('0', 2 - LEN(@RandHour)))
- + ':' + STUFF(@RandMinute, 1, 0, REPLICATE('0', 2 - LEN(@RandMinute)))
- + ':' + STUFF(@RandSeconds, 1, 0, REPLICATE('0', 2 - LEN(@RandSeconds)))
- + '.' + STUFF(@RandMSeconds, 1, 0, REPLICATE('0', 3 - LEN(@RandMSeconds)));
- SET @TimeVal = CONVERT(time, @TimeVal, 14);
- END
- -- DETERMINE WHICH DATE PARTS WERE SUBMITTED
- IF @TimeMatch=1 AND @DateMatch=0
- SET @NewVal = @TimeVal;
- ELSE IF @TimeMatch=0 AND @DateMatch=1
- SET @NewVal = @DateVal;
- ELSE IF @TimeMatch=1 AND @DateMatch=1
- SET @NewVal = CAST(@DateVal AS varchar(10)) + ' ' + CAST(@TimeVal AS varchar(16));
- ELSE
- SET @NewVal = '';
- -- REDUCE THIS STRING TO THE SAME LENGTH AS SUBMITTED VALUE
- SET @NewVal = SUBSTRING(LTRIM(RTRIM(CAST(@NewVal AS VARCHAR(30)))), 1, LEN(@OrigVal));
- END
- ELSE
- BEGIN
- -- Loop through the characters passed
- WHILE @LoopCt <= @OrigLen
- BEGIN
- -- Current length of possible characters
- SET @CurrLen = DATALENGTH(@OrigVal);
- -- Random position of character to use
- SELECT @Rand = Convert(int,(((1) - @CurrLen) * RandomValue + @CurrLen)) FROM dbo.vwRandom;
- -- Assembles the value to be returned
- SET @NewVal = @NewVal + SUBSTRING(@OrigVal,@Rand,1);
- -- Removes the character from available options
- SET @OrigVal = SUBSTRING(@OrigVal,1,(@Rand-1))+SUBSTRING(@OrigVal,(@Rand+1),LEN(@OrigVal));
- -- Advance the loop
- SET @LoopCt = @LoopCt + 1;
- END
- -- Format new value
- SET @NewVal = LTRIM(RTRIM(@NewVal));
- END
- -- Return new value
- RETURN @NewVal;
- END
- GO
Usage
- SELECT
- Common.ufn_DataJumble(Forenames) AS Forenames
- , Common.ufn_DataJumble(Surname) AS Surnames
- , Common.ufn_DataJumble(AccountNumber) AS AccountNumber
- , Common.ufn_DataJumble(DateOfBirth) AS DateOfBirth
- FROM
- mySchema.mySensitiveTable;
- UPDATE Student.PersonExtract
- SET Surname=Common.ufn_DataJumble(Surname);
Known Issues (fixed in above function)
- "Invalid length parameter passed to the LEFT or SUBSTRING function"
- Add LTRIM(RTRIM(Forenames)) if Forenames is a char datatype.
- DateOfBirth is interpreted as string (isdate does not recognize datatype date)
- CAST(Common.ufn_DataJumble(CAST(DateOfBirth AS DATETIME)) AS DATE) AS DateOfBirth
Still To Do
Create stored procedure that will take any table and return the table with scrambled data:
- Needs to determine datatype
- Cannot exceed allowed length (shouldn't happen if simply scrambling data)
- Function needs to be reduced in size
Stored Procedure (using this function)
So that I can give it any table name and it will maintain structure and data types:
- USE UnitTestDataStaging;
- GO
- -- Drop Stored Procedure if already exists
- IF OBJECT_ID ( 'Common.ufn_DataJumble', 'P' ) IS NOT NULL
- DROP PROCEDURE Common.ufn_DataJumble;
- GO
- -- Create Stored Procedure
- CREATE PROCEDURE Common.ufn_DataJumble
- @SchemaTableName nvarchar(100)
- AS
- BEGIN
- -- Variables used
- DECLARE @TableName varchar(50);
- DECLARE @SqlToExecute nvarchar(4000);
- -- Set variable default values
- SET @TableName = SUBSTRING(@SchemaTableName, CHARINDEX('.', @SchemaTableName)+1, LEN(@SchemaTableName));
- SET @SqlToExecute = '';
- -- Generate Select Query to extract data from given table
- SET @SqlToExecute='SELECT ' + CHAR(10) + STUFF(
- (
- -- Code to concatenate column names and data types into one string
- SELECT
- ',Common.ufn_DataJumble(ltrim(rtrim(' + COLUMN_NAME+ '))) AS ' + COLUMN_NAME + CHAR(10)
- FROM
- INFORMATION_SCHEMA.COLUMNS
- WHERE
- TABLE_NAME = @TableName
- ORDER BY ORDINAL_POSITION
- FOR XML PATH ('')
- ),1,1,''
- ) + ' FROM ' + @SchemaTableName;
- EXEC(@SqlToExecute);
- END
- GO
- -- execute stored procedure on a table
- EXECUTE Common.ufn_DataJumble 'dbo.myTable';
Latest Posts
-
Joes Revolver Map (JRM)
-
Fri 17-May-13
Hmmm... Sounds like a problem with the identifier. Was it working before and has there been a change ...
-
Fri 17-May-13
Hello Joel: Yes, I do have it published on all pages of the site. I just went back to Revolver maps to ...
-
Fri 17-May-13
Hi Bill, From the developers of RevolverMaps, "the module would need to be published on every page ...
-
Fri 17-May-13
Hi Bill, I'll investigate further as you're not the first to say this happens. In the meantime, simply ...
-
Fri 17-May-13
Its a great extension. But when I set it up I only show my presence on the 3D map and no other visitors ...
-
Fri 17-May-13

