开发有个语句执行了超过2个小时没有结果,询问我到底为什么执行这么久。
语句格式如下select * from tgt1 a left join tgt2 b on a.id=b.id and a.id>=6 order by a.id; 这个是典型的理解错误,本意是要对a表进行过滤后进行[]left join]的,我们来看看到底什么是真正的[left join]。
[gpadmin@mdw ~]$ psql bigdatagp
psql (8.2.15)
Type "help" for help.
bigdatagp=# drop table tgt1;
DROP TABLE
bigdatagp=# drop table tgt2;
DROP TABLE
bigdatagp=# explain select t1.telnumber,t2.ua,t2.url,t1.apply_name,t2.apply_name from gpbase.tb_csv_gn_ip_session t1 ,gpbase.tb_csv_gn_http_session_hw t2 where t1.bigdatagp=# \q bigdatagp=# create table tgt1(id int, name varchar(20)); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'id' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
CREATE TABLE
bigdatagp=# create table tgt2(id int, name varchar(20));
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'id' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
CREATE TABLE
bigdatagp=# insert into tgt1 select generate_series(1,3),('a','b');
ERROR: column "name" is of type character varying but expression is of type record
HINT: You will need to rewrite or cast the expression.
bigdatagp=# insert into tgt1 select generate_series(1,5),generate_series(1,5)||'a';
INSERT 0 5
bigdatagp=# insert into tgt2 select generate_series(1,2),generate_series(1,2)||'a';
INSERT 0 2
bigdatagp=# select * from tgt1;
id | name
----+------
2 | 2a
4 | 4a
1 | 1a
3 | 3a
5 | 5a
(5 rows)
bigdatagp=# select * from tgt1 order by id;
id | name
----+------
1 | 1a
2 | 2a
3 | 3a
4 | 4a
5 | 5a
(5 rows)
bigdatagp=# select * from tgt2 order by id;
id | name
----+------
1 | 1a
2 | 2a
(2 rows)
bigdatagp=# select * from tgt1 a left join tgt2 b on a.id=b.id;
id | name | id | name
----+------+----+------
3 | 3a | |
5 | 5a | |
1 | 1a | 1 | 1a
2 | 2a | 2 | 2a
4 | 4a | |
(5 rows)
bigdatagp=# select * from tgt1 a left join tgt2 b on a.id=b.id order by a.id;
id | name | id | name
----+------+----+------
1 | 1a | 1 | 1a
2 | 2a | 2 | 2a
3 | 3a | |
4 | 4a | |
5 | 5a | |
(5 rows)
bigdatagp=# select * from tgt1 a left join tgt2 b on a.id=b.id where id>
=3 order by a.id;
ERROR: column reference "id" is ambiguous
LINE 1: ...* from tgt1 a left join tgt2 b on a.id=b.id where id>=3 orde...
^
bigdatagp=# select * from tgt1 a left join tgt2 b on a.id=b.id where a.id>=3 order by a.id;
id | name | id | name
----+------+----+------
3 | 3a | |
4 | 4a | |
5 | 5a | |
(3 rows)
bigdatagp=# select * from tgt1 a left join tgt2 b on a.id=b.id and a.id>=3 order by a.id;
id | name | id | name
----+------+----+------
1 | 1a | |
2 | 2a | |
3 | 3a | |
4 | 4a | |
5 | 5a | |
(5 rows)
bigdatagp=# select * from tgt1 a left join tgt2 b on a.id=b.id where a.id>=6 order by a.id;
id | name | id | name
----+------+----+------
(0 rows)
bigdatagp=# select * from tgt1 a left join tgt2 b on a.id=b.id and a.id>=6 order by a.id;
id | name | id | name
----+-----