文章标题
- 1、安装sqlserver数据库
- 2、下载kettle
- 3、业务分析
- 4、详细流程
- (1)转换1:获取sqlserver所有表格名字,将记录复制到结果
- (2)转换2:从结果设置变量
- (3)转换3:生成建表的DDL
- (4)转换4:迁移数据到mysql
- (5)工作流1:单表同步流程
- (6)工作流2:主流程
title: ETL-使用kettle批量复制sqlserver数据到mysql数据库
date: 2023-11-21 10:21:53
tags: ETL
cover: https://gulimall-ayu.oss-cn-chengdu.aliyuncs.com/blog/QQ%E5%9B%BE%E7%89%8720231121133353.png
1、安装sqlserver数据库
#安装之前我们准备好挂载文件夹:/opt/module/mssql
#并且修改文件夹所有者: chown -R 10001:0 ./opt/module/mssqldocker run \--name mssql \-e 'ACCEPT_EULA=Y' \-e 'MSSQL_SA_PASSWORD=XLYqwe123' \-p 1433:1433 \-v /opt/module/mssql:/var/opt/mssql \--restart=always \-d mcr.microsoft.com/mssql/server:2017-latest#进入容器命令:
docker exec -it 容器id /bin/bash#登录命令:/opt/mssql-tools/bin/sqlcmd -S localhost -U SA -P "XLYqwe123"#然后我们就可以创建一些表用来模拟传输数据
2、下载kettle
kettle在外网下载起来非常慢,这是我使用的版本
链接:https://pan.baidu.com/s/142eHrLx5AjmGxwCEbabfCw?pwd=uqmh
提取码:uqmh
3、业务分析
现在一共是四百多张表在sqlserver里面,直接用navicat的传输工具要报错,
在kettle里面是这样解决的,先根据sqlserver的表生成mysql的建表语句(ddl),然后
在将sqlserver的表格数据插入过去。
4、详细流程
流程完全是copy的这个文章:
https://blog.csdn.net/xuyang2059/article/details/124431556?spm=1001.2014.3001.5502总共涉及到两个工作流,4个转换算子
(1)转换1:获取sqlserver所有表格名字,将记录复制到结果
SELECT TABLE_NAME
FROM INFORMATION_SCHEMA.TABLES
WHERE TABLE_TYPE = 'BASE TABLE'
ORDER BY TABLE_NAME;
(2)转换2:从结果设置变量
(3)转换3:生成建表的DDL
declare @table varchar(100) = '${TNAME}'
declare @sql table(s varchar(1000), id int identity)
-- 创建语句
insert into @sql(s) values ('create table if not exists ${TNAME} (')-- 获取注释
SELECT A.name AS table_name,B.name AS column_name,C.value AS column_description
into #columnsproperties
FROM sys.tables AINNER JOIN sys.columns B ON B.object_id = A.object_idLEFT JOIN sys.extended_properties C ON C.major_id = B.object_id AND C.minor_id = B.column_id
WHERE A.name = @table-- 获取列的列表,拼接语句
insert into @sql(s)
select ' `' + replace(lower(a.column_name),' ','') + '` ' +case data_typewhen 'datetime2' then 'datetime'when 'datetimeoffset' then 'datetime'when 'smalldatetime' then 'datetime'when 'money' then 'decimal(19,4)'when 'smallmoney' then 'decimal(19,4)'when 'nchar' then 'varchar'when 'ntext' then 'text'when 'nvarchar' then 'varchar'when 'char' then 'varchar'when 'real' then 'float'when 'numeric' then 'decimal'when 'uniqueidentifier' then 'varchar(40)'when 'xml' then 'text'when 'image' then 'longblob'else data_typeend +coalesce(case data_type when 'image' then '' -- xml格式转成text也不能在后面加长度否则text会自动变成tinytextwhen 'xml' then ''else '(' + cast(abs(character_maximum_length) as varchar) + ')' end, '') + ' ' +(case when IS_NULLABLE = 'NO' then 'NOT ' else '' end) + 'NULL ' +replace(replace(coalesce('DEFAULT ' + COLUMN_DEFAULT, ''), '(', ''), ')', '') +casewhen isnull(convert(varchar, b.column_description), '') <> ''then '/**' + isnull(convert(varchar, b.column_description), '') + '**/,'else ',' end
from INFORMATION_SCHEMA.COLUMNS aleft join #columnsproperties bon convert(varchar, a.column_name) = convert(varchar, b.column_name)
where a.table_name = @table
order by ordinal_position-- etl日期字段
insert into @sql(s)
values (' etl_date datetime NOT NULL ,')-- 主键
declare @pkname varchar(100)
select @pkname = constraint_name
from INFORMATION_SCHEMA.TABLE_CONSTRAINTS
where table_name = @tableand constraint_type = 'PRIMARY KEY'
if (@pkname is not null)begininsert into @sql(s) values (' PRIMARY KEY (')insert into @sql(s)select ' ' + COLUMN_NAME + ','from INFORMATION_SCHEMA.KEY_COLUMN_USAGEwhere constraint_name = @pknameorder by ordinal_position-- 去除尾部多余的字符update @sql set s=left(s, len(s) - 1) where id = @@identityinsert into @sql(s) values (' )')end
elsebegin-- 去除尾部多余的字符update @sql set s=left(s, len(s) - 1) where id = @@identityend
-- 继续拼接
insert into @sql(s)
values (')')drop table #columnsproperties-- 输出结果
select stuff((select CHAR(10) + s from @sql order by id for xml path('')), 1, 1, '') as ddl
(4)转换4:迁移数据到mysql
(5)工作流1:单表同步流程
(6)工作流2:主流程