目录
- 1. 解题思路
- 2. 具体解析实现
- 3. 单元测试
用例:有一段sql语句,我们需要从中截取出所有字段部分,以便进行后续的类型推断或者别名字段抽取定义,请给出此解析方法。
想来很简单吧,因为 sql 中的字段列表,使用方式有限,比如 a as b, a, a b…
1. 解题思路
如果不想做复杂处理,最容易想到的,就是直接用某个特征做分割即可。比如,先截取出 字段列表部分,然后再用逗号’,’ 分割,就可以得到一个个的字段了。然后再要细分,其实只需要用 as 进行分割就可以了。
看起来好像可行,但是存在许多漏洞,首先,这里面有太多的假设:各种截取部分要求必须符合要求,必须没有多余的逗号,必须要有as 等等。这明显不符合要求了。
其二,我们可以换一种转换方式。比如先截取到field部分,然后先以 as 分割,再以逗号分割,然后取最后一个词作为field。
看起来好像更差了,截取到哪里已经完全不知道了。即原文已经被破坏殆尽,而且同样要求要有 as 转换标签,而且对于函数觊觎有 as 的场景,就完全错误了。
其三,最好还是自行一个个单词地解析,field 字段无外乎几种情况,1. 普通字段如 select a; 2. 带as的普通字段如 select a as b; 3. 带函数的字段如 select coalesce(a, b); 4. 带函数且带as的字段如 select coalesce(a, b) ab; 5. 函数内带as的字段如 select cast(a as string) b; … 我们只需依次枚举对应的情况,就可以将字段解析出来了。
看起来是个不错的想法。但是具体实现如何?
2. 具体解析实现
主要分两个部分,1. 需要定义一个解析后的结果数据结构,以便清晰描述字段信息; 2. 分词解析sql并以结构体返回;
我们先来看看整个算法核心:
/**
* 功能描述: 简单sql字段解析器
*
* 样例如1:
* select coalesce(t1.xno, t2.xno, t3.xno) as xno,
* case when t1.no is not null then 1 else null end as xxk001,
* case when t2.no is not null then 1 else null end as xxk200,
* case when t3.xno is not null then 1 else null end as xx3200
* from xxk001 t1
* full join xxkj100 t2 on t1.xno = t2.xno
* full join xxkj200 t3 on t1.xno = t3.xno;
*
* 样例如2:
* select cast(a as string) as b from ccc;
*
* 样例如3:
* with a as(select cus,x1 from b1), b as (select cus,x2 from b2)
* select a.cus as a_cus from a join b on a.cus=b.cus where xxx;
*
* 样例如4:
* select a.xno,b.xx from a_tb as a join b_tb as b on a.id = b.id
*
* 样例如5:
* select cast \t(a as string) a_str, cc (a as double) a_double from x
*
*/
public class simplesqlfieldparser {
/**
* 解析一段次标签sql 中的字段列表
*
* @param sql 原始sql, 需如 select xx from xxx join ... 格式
* @return 字段列表
*/
public static list<selectfieldclausedescriptor> parse(string sql) {
string columnpart = adaptfieldpartsql(sql);
int deep = 0;
list<stringbuilder> fieldtokenswap = new arraylist<>();
stringbuilder currenttokenbuilder = new stringbuilder();
list<selectfieldclausedescriptor> fieldlist = new arraylist<>();
fieldtokenswap.add(currenttokenbuilder);
int len = columnpart.length();
char[] columnpartchars = columnpart.tochararray();
for(int i = 0; i < len; i++) {
// 空格忽略,换行忽略,tab忽略
// 字符串相接
// 左(号入栈,++deep;
// 右)号出栈,--deep;
// deep>0 忽略所有其他直接拼接
// as 则取下一个值为fieldname
// case 则直接取到end为止;
//,号则重置token,构建结果集
char currentchar = columnpartchars[i];
switch (currentchar) {
case '(':
++deep;
currenttokenbuilder.append(currentchar);
break;
case ')':
--deep;
currenttokenbuilder.append(currentchar);
break;
case ',':
if(deep == 0) {
addnewfield(fieldlist, fieldtokenswap, true);
fieldtokenswap = new arraylist<>();
currenttokenbuilder = new stringbuilder();
fieldtokenswap.add(currenttokenbuilder);
break;
}
currenttokenbuilder.append(currentchar);
break;
case ' ':
case '\t':
case '\r':
case '\n':
if(deep > 0) {
currenttokenbuilder.append(currentchar);
continue;
}
if(currenttokenbuilder.length() == 0) {
continue;
}
// original_name as --> alias
if(i + 1 < len) {
int j = i + 1;
// 收集连续的空格
stringbuilder spaceholder = new stringbuilder();
boolean isnextleftbracket = false;
do {
char nextchar = columnpart.charat(j++);
if(nextchar == ' ' || nextchar == '\t'
|| nextchar == '\r' || nextchar == '\n') {
spaceholder.append(nextchar);
continue;
}
if(nextchar == '(') {
isnextleftbracket = true;
}
break;
} while (j < len);
if(isnextleftbracket) {
currenttokenbuilder.append(currentchar);
}
if(spaceholder.length() > 0) {
currenttokenbuilder.append(spaceholder);
i += spaceholder.length();
}
if(isnextleftbracket) {
// continue next for, function begin
continue;
}
}
if(fieldtokenswap.size() == 1) {
if(fieldtokenswap.get(0).tostring().equalsignorecase("case")) {
string casewhenpart = commonutil.readsplitword(
columnpartchars, i, " ", "end");
currenttokenbuilder.append(casewhenpart);
if(casewhenpart.length() <= 0) {
throw new bizexception("语法错误,未找到case..when的结束符");
}
i += casewhenpart.length();
}
}
addnewfield(fieldlist, fieldtokenswap, false);
currenttokenbuilder = new stringbuilder();
fieldtokenswap.add(currenttokenbuilder);
break;
// 空格忽略
default:
currenttokenbuilder.append(currentchar);
break;
}
}
// 处理剩余尚未存储的字段信息
addnewfield(fieldlist, fieldtokenswap, true);
return fieldlist;
}
/**
* 新增一个字段描述
*
* @param fieldlist 字段容器
* @param fieldtokenswap 候选词
*/
private static void addnewfield(list<selectfieldclausedescriptor> fieldlist,
list<stringbuilder> fieldtokenswap,
boolean forceadd) {
int ts = fieldtokenswap.size();
if(ts == 1 && forceadd) {
// db.original_name,
string fieldname = fieldtokenswap.get(0).tostring();
string alias = fieldname;
if(fieldname.contains(".")) {
alias = fieldname.substring(fieldname.lastindexof('.') + 1);
}
fieldlist.add(new selectfieldclausedescriptor(fieldname, alias));
return;
}
if(ts < 2) {
return;
}
if(ts == 2) {
// original_name alias,
if(fieldtokenswap.get(1).tostring().equalsignorecase("as")) {
return;
}
fieldlist.add(new selectfieldclausedescriptor(
fieldtokenswap.get(0).tostring(),
fieldtokenswap.get(1).tostring()));
}
else if(ts == 3) {
// original_name as alias,
fieldlist.add(new selectfieldclausedescriptor(
fieldtokenswap.get(0).tostring(),
fieldtokenswap.get(2).tostring()));
}
else {
throw new bizexception("字段语法解析错误,超过3个以字段描述信息:" + ts);
}
}
// 截取适配 field 字段信息部分
private static string adaptfieldpartsql(string fullsql) {
int start = fullsql.lastindexof("select ");
int end = fullsql.lastindexof(" from");
string columnpart = fullsql.substring(start + "select ".length(), end);
return columnpart.trim();
}
}
应该说是比较简单的,一个for, 一个 switch ,就搞定了。其他的,更多的是逻辑判定。
下面我们来看看字段描述类的写法,其实就是两个字段,源字段和别名。
/**
* 功能描述: sql字段描述 select 字段描述类
*
*/
public class selectfieldclausedescriptor {
private string fieldname;
private string alias;
public selectfieldclausedescriptor(string fieldname, string alias) {
this.fieldname = fieldname;
this.alias = alias;
}
public string getfieldname() {
return fieldname;
}
public string getalias() {
return alias;
}
@override
public boolean equals(object o) {
if (this == o) return true;
if (o == null || getclass() != o.getclass()) return false;
selectfieldclausedescriptor that = (selectfieldclausedescriptor) o;
return objects.equals(fieldname, that.fieldname) &&
objects.equals(alias, that.alias);
}
@override
public int hashcode() {
return objects.hash(fieldname, alias);
}
@override
public string tostring() {
return "selectfieldclausedescriptor{" +
"fieldname='" + fieldname + '\'' +
", alias='" + alias + '\'' +
'}';
}
}
它存在的意义,仅仅是为了使用方更方便取值,以为更进一步的解析提供了依据。
3. 单元测试
其实像写这种工具类,单元测试最是方便简单。因为最初的结果,我们早已预料,以测试驱动开发最合适不过了。而且,基本上一出现不符合预期的值时,很快速就定位问题了。
/**
* 功能描述: sql字段解析器测试
**/
public class simplesqlfieldparsertest {
@test
public void testparse() {
string sql;
list<selectfieldclausedescriptor> parsedfieldlist;
sql = "select coalesce(t1.xno, t2.xno, t3.xno) as xno,\n" +
" case when t1.xno is not null then 1 else null end as xxk001,\n" +
" case when t2.xno is not null then 1 else null end as xxk200,\n" +
" case when t3.xno is not null then 1 else null end as xx3200\n" +
" from xxk001 t1\n" +
" full join xxkj100 t2 on t1.xno = t2.xno\n" +
" full join xxkj200 t3 on t1.xno = t3.xno;";
parsedfieldlist = simplesqlfieldparser.parse(sql);
system.out.println("result:");
parsedfieldlist.foreach(system.out::println);
assert.assertequals("字段个数解析不正确",
4, parsedfieldlist.size());
assert.assertequals("字段别名解析不正确",
"xno", parsedfieldlist.get(0).getalias());
assert.assertequals("字段别名解析不正确",
"xx3200", parsedfieldlist.get(3).getalias());
sql = "select cast(a as string) as b from ccc;";
parsedfieldlist = simplesqlfieldparser.parse(sql);
system.out.println("result:");
parsedfieldlist.foreach(system.out::println);
assert.assertequals("字段个数解析不正确",
1, parsedfieldlist.size());
assert.assertequals("字段别名解析不正确",
"b", parsedfieldlist.get(0).getalias());
sql = "with a as(select cus,x1 from b1), b as (select cus,x2 from b2)\n" +
" select a.cus as a_cus, cast(a \nas string) as a_cus2, " +
"b.x2 b2 from a join b on a.cus=b.cus where xxx;";
parsedfieldlist = simplesqlfieldparser.parse(sql);
system.out.println("result:");
parsedfieldlist.foreach(system.out::println);
assert.assertequals("字段个数解析不正确",
3, parsedfieldlist.size());
assert.assertequals("字段别名解析不正确",
"a_cus", parsedfieldlist.get(0).getalias());
assert.assertequals("字段别名解析不正确",
"b2", parsedfieldlist.get(2).getalias());
sql = "select a.xno,b.xx,qqq from a_tb as a join b_tb as b on a.id = b.id";
parsedfieldlist = simplesqlfieldparser.parse(sql);
system.out.println("result:");
parsedfieldlist.foreach(system.out::println);
assert.assertequals("字段个数解析不正确",
3, parsedfieldlist.size());
assert.assertequals("字段别名解析不正确",
"xno", parsedfieldlist.get(0).getalias());
assert.assertequals("字段别名解析不正确",
"qqq", parsedfieldlist.get(2).getalias());
sql = "select cast (a.a_int as string) a_str, b.xx, coalesce \n( a, b, c) qqq from a_tb as a join b_tb as b on a.id = b.id";
parsedfieldlist = simplesqlfieldparser.parse(sql);
system.out.println("result:");
parsedfieldlist.foreach(system.out::println);
assert.assertequals("字段个数解析不正确",
3, parsedfieldlist.size());
assert.assertequals("字段别名解析不正确",
"a_str", parsedfieldlist.get(0).getalias());
assert.assertequals("字段原始名解析不正确",
"cast (a.a_int as string)", parsedfieldlist.get(0).getfieldname());
assert.assertequals("字段别名解析不正确",
"qqq", parsedfieldlist.get(2).getalias());
assert.assertequals("字段原始名解析不正确",
"coalesce \n( a, b, c)", parsedfieldlist.get(2).getfieldname());
}
}
至此,一个简单的字段解析器完成。小工具,供参考!
到此这篇关于sql字段解析器的实现示例的文章就介绍到这了,更多相关sql字段解析器内容请搜索www.887551.com以前的文章或继续浏览下面的相关文章希望大家以后多多支持www.887551.com!