Execute this SQL

-- listing files
FROM glob('dataset/*');

-- reading from files
FROM 'dataset/*.parquet' LIMIT 100; 

-- reading parquet files metadata (min_value, max_value, null_count for each field in each file)
FROM parquet_metadata('dataset/*.parquet');

-- convert files or export tables to parquet
COPY (FROM tbl) TO 'file.parquet' (FORMAT 'parquet');
COPY 'data.csv' TO 'data.paruqet' (FORMAT 'parquet');

Copy code

Octavian Zarzu

Copy code

Expand

Share link

Execute this SQL

-- This selects only specific fields from the CSV
-- It also renames one of the fields in the target schema
COPY (SELECT col1, col2, col3 AS new_col_name
	    FROM read_csv('~/data/source.csv',AUTO_DETECT=TRUE))
  TO '~/data/target.parquet' (FORMAT 'PARQUET', CODEC 'ZSTD');

Copy code

Robin Moffatt

Copy code

Expand

Share link

Execute this SQL

-- Here we make use of list_transform and 
-- the fact that DuckDB knows lambda expressions
SELECT list_transform(split('01:08:22', ':'), x -> CAST (x AS INTEGER));

Copy code

Michael Simons

Copy code

Expand

Share link

Execute this SQL

with bits as (
    select
    -- add 8 bits to the end to account for the delimiter
    bit_length(columns(*))  + 8
    from <TABLE>
),
-- aggregate all columns
bits_agg as (
    select sum(columns(*)) from bits
),
-- unpivot a wide single row of aggs to single column
bits_col as (
    unpivot bits_agg on columns(*)
)
-- add them all up & convert to mb
select sum(value) / (8 * 1024 ** 2) as mb
from bits_col

Copy code

Hamilton Ulmer

Copy code

Expand

Share link


Convert EPOC integer field to TimestampSQL

Editor's note: date/time conversion can be painful in SQL and in many programming languages. Simon shows you how to handle it in DuckDB here, with timezone values. There are other timestamp without timezone functions in the DuckDB docs.

Execute this SQL

select ('EPOCH'::TIMESTAMP + INTERVAL (epoc_field::INT) seconds)::TIMESTAMPTZ  as epoc_tz
from
(
    -- epoc column - number of seconds since 1 Jan 1970
    select 1682481422 as epoc_field
)
;

Copy code

Simon Aubury

Expand

Share link

Execute this SQL

--Below SQL to generate MD5 of a string

select md5('test')

Copy code

Expand

Share link


StackOverflow Analytics

Editor's note: Michael shares Stackoverflow data in MotherDuck as part of this snippet as well as typical aggregate analytics on the data. There are additional sample data sets attached by default in MotherDuck as the 'sample_data' share.

Attach and select MotherDuck database

Data shared/available on MotherDuck

ATTACH 'md:_share/stackoverflow/6c318917-6888-425a-bea1-5860c29947e5' AS stackoverflow;
USE stackoverflow;

Copy code

Which 5 questions have the most comments, what is the post title and comment countSQL

SELECT Title, CommentCount
FROM posts
WHERE PostTypeId = 1
ORDER BY CommentCount DESC
LIMIT 5;

Copy code

User reputation and reputation rate per daySQL

SELECT name, reputation, 
       round(reputation/day(today()-createdAt)) as rate, 
       day(today()-createdAt) as days, 
       createdAt
FROM users WHERE reputation > 1000000 ORDER BY rate DESC;

Copy code

Reputation rate as bar chart with CTESQL

WITH top_users as (
  SELECT name, reputation, 
       round(reputation/day(today()-createdAt)) as rate, day(today()-createdAt) as days, 
       createdAt
       FROM users WHERE reputation > 1000000
)
SELECT name, reputation, rate, bar(rate,150,300,35) AS bar FROM top_users;

Copy code

Post statistics per yearSQL

SELECT  year(CreationDate) AS year, count(*), 
        round(avg(ViewCount)), max(AnswerCount)
FROM posts 
GROUP BY year 
ORDER BY year DESC LIMIT 10;

Copy code

Posting Frequency with bar chart on Weekdays for "sql" tagSQL

SELECT count(*) as freq, dayname(CreationDate) AS day, 
       bar(freq, 0, 150000,20) AS plot
FROM posts WHERE posttypeid = 1 
AND tags LIKE '%<sql>%'
GROUP BY all 
ORDER BY freq DESC;

Copy code

Posting Frequency with bar chart on Weekdays for "rust" tagSQL

SELECT count(*) as freq, dayname(CreationDate) AS day, 
       bar(freq, 0, 10000,20) AS plot
FROM posts WHERE posttypeid = 1 
AND tags LIKE '%<rust>%'
GROUP BY all 
ORDER BY freq DESC;

Copy code

Show less
Michael Hunger

Expand

Share link