|
Volume Testing
To volume test TextServer and to explore how well optimization
strategies performed when
accessing large tables, a standard SQL benchmark provided by the
Transaction Processing Performance Council was used.
This benchmark allows large tables to be rapidly generated, and provides a
suite of sample queries that can be applied against the resulting relations.
Each relational table was converted into a single table. However there is
nothing intrinsically wrong with storing multiple relational tables in a single
text.
The
generated TPDC data
was loaded into tables of the following sizes:
| Name
| Tuples
| Bytes
| |
REGION
|
5
|
1,425
| |
NATION
|
25
|
3,792
| |
SUPPLIER
|
100
|
19,326
| |
CUSTOMER
|
1,500
|
323,076
| |
PART
|
2,000
|
357,102
| |
PARTSUPP
|
8,000
|
1,443,121
| |
ORDER
|
15,000
|
2,572,401
| |
LINEITEM
|
60,175
|
13,520,499
|
This data when loaded as text occupies 63 MBytes of disk, and when indexed
requires in total approximately twice this disk storage. Significantly less
space would have been needed if we had used short GENID's, short numeric
values, and/or avoided including both encoded and unencoded representations
of internal values. This compression would potentially have improved
the performance of our software.
Here are very approximate comparisons of performance for the provided TPCD
sample queries. Click on the query name to see the query, and on the time
when highlighted to repeat execution of this query.
Commentary
The results presented above suggest that encoding relational data
directly within structured text documents is not only theoretically
possible, but reasonably efficient when compared to storage in
a modern relational data base system.
Appendix: Sample Queries
Not all samples queries could be executed without modification on
Oracle since the language accepted by Oracle does not conform to
the SQL2 standard. Dates had to be represented differently, interval
constants had to be removed and case statements had to be reworked
using Oracle's DECODE() function. Some SQL2 functions (such as extract)
needed to be reworked using equivalent Oracle constructs.
Return to statistics
Query 1
SELECT
L_RETURNFLAG, L_LINESTATUS, SUM(L_QUANTITY) AS SUM_QTY,
SUM(L_EXTENDEDPRICE) AS SUM_BASE_PRICE,
SUM(L_EXTENDEDPRICE*(1-L_DISCOUNT)) AS SUM_DISC_PRICE,
SUM(L_EXTENDEDPRICE*(1-L_DISCOUNT)*(1+L_TAX)) AS SUM_CHARGE,
AVG(L_QUANTITY) AS AVG_QTY, AVG(L_EXTENDEDPRICE) AS AVG_PRICE,
AVG(L_DISCOUNT) AS AVG_DISC, COUNT(*) AS COUNT_ORDERS
FROM LINEITEM
WHERE L_SHIPDATE <= DATE '1998-12-01' - INTERVAL '90' DAY
GROUP BY L_RETURNFLAG, L_LINESTATUS
ORDER BY L_RETURNFLAG, L_LINESTATUS;
Return to statistics
Query 2
SELECT
S_ACCTBAL, S_NAME, N_NAME, P_PARTKEY, P_MFGR, S_ADDRESS, S_PHONE, S_COMMENT
FROM PART P, SUPPLIER S, PARTSUPP PS, NATION, REGION
WHERE P_PARTKEY = PS_PARTKEY
AND S_SUPPKEY = PS_SUPPKEY
AND P_SIZE = 15 -- [size]
AND P_TYPE LIKE '%BRASS' -- [type]
AND S_NATIONKEY = N_NATIONKEY
AND N_REGIONKEY = R_REGIONKEY
AND R_NAME = 'EUROPE' -- [region]
AND PS_SUPPLYCOST = (SELECT MIN(PS_SUPPLYCOST)
FROM PARTSUPP PS1, SUPPLIER S1, NATION N1, REGION R1
WHERE P.P_PARTKEY = PS1.PS_PARTKEY
AND S1.S_SUPPKEY = PS1.PS_SUPPKEY
AND S1.S_NATIONKEY = N1.N_NATIONKEY
AND N1.N_REGIONKEY = R1.R_REGIONKEY
AND R1.R_NAME = 'EUROPE') -- [region]
ORDER BY S_ACCTBAL DESC, N_NAME, S_NAME, P_PARTKEY
Return to statistics
Query 3
SELECT
L_ORDERKEY, SUM(L_EXTENDEDPRICE*(1-L_DISCOUNT)) AS REVENUE,
O_ORDERDATE, O_SHIPPRIORITY
FROM CUSTOMER, ORDERS, LINEITEM
WHERE C_MKTSEGMENT = 'BUILDING' -- [segment]
AND C_CUSTKEY = O_CUSTKEY
AND L_ORDERKEY = O_ORDERKEY
AND O_ORDERDATE < DATE '1995-03-15' -- [date]
AND L_SHIPDATE > DATE '1995-03-15' -- [date]
GROUP BY L_ORDERKEY, O_ORDERDATE, O_SHIPPRIORITY
ORDER BY REVENUE DESC, O_ORDERDATE
Return to statistics
Query 4
SELECT
O_ORDERPRIORITY, COUNT(*) AS ORDER_COUNT
FROM ORDERS
WHERE O_ORDERDATE >= DATE '1993-07-01' -- [date]
AND O_ORDERDATE < DATE '1993-07-01' + INTERVAL '3' MONTH -- [date]
AND EXISTS(SELECT *
FROM LINEITEM
WHERE L_ORDERKEY = O_ORDERKEY
AND L_COMMITDATE < L_RECEIPTDATE
)
GROUP BY O_ORDERPRIORITY
ORDER BY O_ORDERPRIORITY
Return to statistics
Query 5
SELECT
N_NAME, SUM(L_EXTENDEDPRICE*(1-L_DISCOUNT)) AS REVENUE
FROM CUSTOMER, ORDERS, LINEITEM, SUPPLIER, NATION, REGION
WHERE C_CUSTKEY = O_CUSTKEY
AND O_ORDERKEY = L_ORDERKEY
AND L_SUPPKEY = S_SUPPKEY
AND C_NATIONKEY = S_NATIONKEY
AND S_NATIONKEY = N_NATIONKEY
AND N_REGIONKEY = R_REGIONKEY
AND R_NAME = 'ASIA'
AND O_ORDERDATE >= DATE '1994-01-01'
AND O_ORDERDATE < DATE '1994-01-01' + INTERVAL '1' YEAR
GROUP BY N_NAME
ORDER BY REVENUE DESC
Return to statistics
Query 6
SELECT
SUM(L_EXTENDEDPRICE*L_DISCOUNT) AS REVENUE
FROM LINEITEM
WHERE L_SHIPDATE >= DATE '1994-01-01'
AND L_SHIPDATE < DATE '1994-01-01' + INTERVAL '1' YEAR
AND L_DISCOUNT BETWEEN 0.06 - 0.01 AND 0.06 + 0.01
AND L_QUANTITY < 24
Return to statistics
Query 7
SELECT
SUPP_NATION, CUST_NATION, THE_YEAR, SUM(VOLUME) AS REVENUE
FROM (SELECT N1.N_NAME AS SUPP_NATION,
N2.N_NAME AS CUST_NATION, EXTRACT(YEAR FROM L_SHIPDATE) AS THE_YEAR,
L_EXTENDEDPRICE*(1-L_DISCOUNT) AS VOLUME
FROM SUPPLIER, LINEITEM, ORDERS, CUSTOMER, NATION N1, NATION N2
WHERE S_SUPPKEY = L_SUPPKEY
AND O_ORDERKEY = L_ORDERKEY
AND C_CUSTKEY = O_CUSTKEY
AND S_NATIONKEY = N1.N_NATIONKEY
AND C_NATIONKEY = N2.N_NATIONKEY
AND ((N1.N_NAME = 'FRANCE' AND N2.N_NAME = 'GERMANY')
OR (N1.N_NAME = 'GERMANY' AND N2.N_NAME = 'FRANCE'))
AND L_SHIPDATE BETWEEN DATE '1995-01-01' AND DATE '1996-12-31'
) AS SHIPPING
GROUP BY SUPP_NATION, CUST_NATION, THE_YEAR
ORDER BY SUPP_NATION, CUST_NATION, THE_YEAR
Return to statistics
Query 8
SELECT
THE_YEAR, SUM(CASE WHEN NATION = 'BRAZIL'
THEN VOLUME * 1000
ELSE 0
END) / SUM(VOLUME) AS MKT_SHARE
FROM (SELECT
EXTRACT(YEAR FROM O_ORDERDATE) AS THE_YEAR,
L_EXTENDEDPRICE * (1-L_DISCOUNT) AS VOLUME, N2.N_NAME AS NATION
FROM PART, SUPPLIER, LINEITEM, ORDERS, CUSTOMER, NATION N1,
NATION N2, REGION
WHERE P_PARTKEY = L_PARTKEY
AND S_SUPPKEY = L_SUPPKEY
AND L_ORDERKEY = O_ORDERKEY
AND O_CUSTKEY = C_CUSTKEY
AND C_NATIONKEY = N1.N_NATIONKEY
AND N1.N_REGIONKEY = R_REGIONKEY
AND R_NAME = 'AMERICA'
AND S_NATIONKEY = N2.N_NATIONKEY
AND O_ORDERDATE BETWEEN DATE '1995-01-01' AND DATE '1996-12-31'
AND P_TYPE = 'ECONOMY ANODIZED STEEL') ALL_NATIONS
GROUP BY THE_YEAR
ORDER BY THE_YEAR
Return to statistics
Query 9
SELECT
NATION, THE_YEAR, SUM(AMOUNT) AS SUM_PROFIT
FROM (SELECT
N_NAME AS NATION, EXTRACT(YEAR FROM O_ORDERDATE) AS THE_YEAR,
L_EXTENDEDPRICE*(1-L_DISCOUNT)-PS_SUPPLYCOST*L_QUANTITY AS AMOUNT
FROM PART, SUPPLIER, LINEITEM, PARTSUPP, ORDERS, NATION
WHERE S_SUPPKEY = L_SUPPKEY
AND PS_SUPPKEY = L_SUPPKEY
AND PS_PARTKEY = L_PARTKEY
AND P_PARTKEY = L_PARTKEY
AND O_ORDERKEY = L_ORDERKEY
AND S_NATIONKEY = N_NATIONKEY
AND P_NAME LIKE '%green%'
) AS PROFIT
GROUP BY NATION, THE_YEAR
ORDER BY NATION, THE_YEAR DESC
Return to statistics
Query 10
SELECT
C_CUSTKEY, C_NAME, SUM(L_EXTENDEDPRICE*(1-L_DISCOUNT)) AS REVENUE,
C_ACCTBAL, N_NAME, C_ADDRESS, C_PHONE, C_COMMENT
FROM CUSTOMER, ORDERS, LINEITEM, NATION
WHERE C_CUSTKEY = O_CUSTKEY
AND L_ORDERKEY = O_ORDERKEY
AND O_ORDERDATE >= DATE '1993-10-01'
AND O_ORDERDATE < DATE '1994-01-01'
AND L_RETURNFLAG = 'R'
AND C_NATIONKEY = N_NATIONKEY
GROUP BY C_CUSTKEY, C_NAME, C_ACCTBAL, C_PHONE, N_NAME, C_ADDRESS, C_COMMENT
ORDER BY REVENUE DESC
Return to statistics
Query 11
SELECT
PS_PARTKEY, SUM(PS_SUPPLYCOST*PS_AVAILQTY) AS THE_VALUE
FROM PARTSUPP, SUPPLIER, NATION
WHERE PS_SUPPKEY = S_SUPPKEY
AND S_NATIONKEY = N_NATIONKEY
AND N_NAME = 'GERMANY'
GROUP BY PS_PARTKEY
HAVING SUM(PS_SUPPLYCOST*PS_AVAILQTY) >
(SELECT
SUM(PS_SUPPLYCOST*PS_AVAILQTY) * 0.001
FROM PARTSUPP, SUPPLIER, NATION
WHERE PS_SUPPKEY = S_SUPPKEY
AND S_NATIONKEY = N_NATIONKEY
AND N_NAME = 'GERMANY'
)
ORDER BY THE_VALUE DESC
Return to statistics
Query 12
SELECT
L_SHIPMODE, SUM(CASE WHEN O_ORDERPRIORITY = '1-URGENT'
OR O_ORDERPRIORITY = '2-HIGH'
THEN 1
ELSE 0
END) AS HIGH_LINE_COUNT,
SUM(CASE WHEN O_ORDERPRIORITY <> '1-URGENT'
AND O_ORDERPRIORITY <> '2-HIGH'
THEN 1
ELSE 0
END) AS LOW_LINE_COUNT
FROM ORDERS, LINEITEM
WHERE O_ORDERKEY = L_ORDERKEY
AND L_SHIPMODE IN ('MAIL', 'SHIP')
AND L_COMMITDATE < L_RECEIPTDATE
AND L_SHIPDATE < L_COMMITDATE
AND L_RECEIPTDATE >= DATE '1994-01-01'
AND L_RECEIPTDATE < DATE '1995-01-01'
GROUP BY L_SHIPMODE
ORDER BY L_SHIPMODE
Return to statistics
Query 13
SELECT
COUNT(*), THE_YEAR, SUM(REVENUE) AS REVENUE
FROM (SELECT
EXTRACT(YEAR FROM O_ORDERDATE) AS THE_YEAR,
L_EXTENDEDPRICE * (1-L_DISCOUNT) AS REVENUE
FROM LINEITEM, ORDERS
WHERE O_ORDERKEY = L_ORDERKEY
AND O_CLERK = 'Clerk#000000088'
AND L_RETURNFLAG = 'R'
) AS PERFORMANCE
GROUP BY THE_YEAR
ORDER BY THE_YEAR
Return to statistics
Query 14
SELECT
100.00 * SUM(CASE WHEN P_TYPE LIKE 'PROMO%'
THEN L_EXTENDEDPRICE*(1-L_DISCOUNT)
ELSE 0
END) / SUM(L_EXTENDEDPRICE*(1-L_DISCOUNT)) AS PROMO_REVENUE
FROM LINEITEM, PART
WHERE L_PARTKEY = P_PARTKEY
AND L_SHIPDATE >= DATE '1995-09-01'
AND L_SHIPDATE < DATE '1995-10-01'
Return to statistics
Query 15
CREATE VIEW REVENUE (SUPPLIER_NO, TOTAL_REVENUE) AS
SELECT L_SUPPKEY, SUM(L_EXTENDEDPRICE*(1-L_DISCOUNT)) AS FOO
FROM LINEITEM
WHERE L_SHIPDATE >= DATE '1996-01-01'
AND L_SHIPDATE < DATE '1996-04-01'
GROUP BY L_SUPPKEY;
SELECT
S_SUPPKEY, S_NAME, S_ADDRESS, S_PHONE, TOTAL_REVENUE
FROM SUPPLIER, REVENUE
WHERE S_SUPPKEY = SUPPLIER_NO
AND TOTAL_REVENUE = (SELECT MAX(TOTAL_REVENUE)
FROM REVENUE)
ORDER BY S_SUPPKEY;
DROP VIEW REVENUE;
Return to statistics
Query 16
SELECT
P_BRAND, P_TYPE, P_SIZE, COUNT(DISTINCT PS_SUPPKEY) AS SUPPLIER_CNT
FROM PARTSUPP, PART
WHERE P_PARTKEY = PS_PARTKEY
AND P_BRAND <> 'Brand#45'
AND P_TYPE NOT LIKE 'MEDIUM POLISHED%'
AND P_SIZE IN (49, 14, 23, 45, 19, 3, 36, 9)
AND PS_SUPPKEY NOT IN (SELECT
S_SUPPKEY
FROM SUPPLIER
WHERE S_COMMENT LIKE '%Better Business Bureau%Complaints%')
GROUP BY P_BRAND, P_TYPE, P_SIZE
ORDER BY SUPPLIER_CNT DESC, P_BRAND, P_TYPE, P_SIZE;
Return to statistics
Query 17
SELECT
SUM(L_EXTENDEDPRICE)/7.0 AS AVG_YEARLY
FROM LINEITEM, PART
WHERE P_PARTKEY = L_PARTKEY
AND P_BRAND = 'Brand#23'
AND P_CONTAINER = 'MED BOX'
AND L_QUANTITY < (SELECT
0.2 * AVG(L1.L_QUANTITY)
FROM LINEITEM L1
WHERE L1.L_PARTKEY = P_PARTKEY);
|