Execute this SQL
-- duckdb has only weekday-name not number -- create enum in the right order create type Weekday as enum ('Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday'); -- cast the string to enum for sorting select count(*), cast(dayname(CreationDate) as Weekday) as day from posts where posttypeid = 1 and tags like '%>sql<%' group by all order by day asc; -- doing the cast in order by causes a bug -- ┌──────────────┬───────────┐ -- │ count_star() │ day │ -- │ int64 │ weekday │ -- ├──────────────┼───────────┤ -- │ 103937 │ Monday │ -- │ 115575 │ Tuesday │ -- │ 119825 │ Wednesday │ -- │ 119514 │ Thursday │ -- │ 103445 │ Friday │ -- │ 47139 │ Saturday │ -- │ 47390 │ Sunday │ -- └──────────────┴───────────┘
Copy code
Michael Hunger
Copy code
Expand
Share link
Execute this SQL
select round(2061899,-6); -- 2000000.0
Copy code
Michael Hunger
Copy code
Expand
Share link
Execute this SQL
-- Using Scaleway as an example, where first 75GB is free -- https://www.scaleway.com/en/object-storage/ -- This also works for Google Storage, Digital Ocean Spaces, -- Wasabi and so on -- set these like you do for AWS SET s3_access_key_id='MY_KEY' SET s3_secret_access_key='MY_SECRET' -- set the endpoint and region to override the default of -- using AWS endpoints SET s3_endpoint='s3.nl-ams.scw.cloud' SET s3_region="nl-ams" SELECT -- format a large number with helpful commas -- for readability format('{:,}', count(*)) as "total rows in file" FROM "s3://<BUCKET_NAME>/<FILE_NAME>.zst.parquet"
Copy code
Chris Adams
Copy code
Expand
Share link
Execute this SQL
LOAD spatial; -- Pick for example any Garmin Connect or Strava export as GPX file -- https://en.wikipedia.org/wiki/GPS_Exchange_Format SELECT round( -- Compute the length of the geomentry in the units of the -- reference system being used ST_Length( -- Transform from standard GPS WGS-84 into a reference system -- that uses meters. EPSG:25832 stands for ETRS89 / UTM zone 32N, -- Europe between 6°E and 12°E ST_transform( -- Fix the order of coordinates -- (GXP read as long/lat, we need lat/long) ST_FlipCoordinates( -- Turn the WKT into a DuckDB Geometry ST_GeomFromWKB(wkb_geometry) ), 'EPSG:4326', 'EPSG:25832' ) ) /1000, 2) AS 'Distance (km)' FROM st_read('activity_11501782487.gpx', layer='tracks');
Copy code
Michael Simons
Copy code
Expand
Share link
Execute this SQL
-- This requires the spatial extension LOAD spatial; -- Pick for example any Garmin Connect or Strava export as GPX file -- https://en.wikipedia.org/wiki/GPS_Exchange_Format WITH elevation_gains AS ( SELECT ele - lag(ele) OVER (ORDER BY track_seg_point_id ASC) AS value FROM st_read('activity_11501782487.gpx', layer='track_points') ) SELECT sum(e.value) FILTER (WHERE e.value >= 0) AS ascent, sum(e.value) FILTER (WHERE e.value < 0) AS descent FROM elevation_gains e;
Copy code
Michael Simons
Copy code
Expand
Share link
No more error on on end of line commasSQL
Editor's note: this query demonstrates default behavior for DuckDB, but boy does it make it easier to comment your SQL lines out without fail.
Execute this SQL
# 🤓 Courtesy of Michael Simons (aka. @rotnroll666) # 🐦 https://twitter.com/rotnroll666/status/1671066790368010241 SELECT foo, bar, # hello, world, # dummy, FROM bazbar;
Copy code
SALES
Expand
Share link
Execute this SQL
-- Background: CloudTrail logs contain a significant amount of data about different actions that occur in your AWS account but can be challenging to deal with for a couple reasons: -- 1. Each file is a single JSON object with a list of of records in a "Records" key -- 2. The schema varies wildly depending on the type of AWS service and actions that are being used. -- CloudTrail logs typically go to: s3://<bucket>/AWSLogs/<accountid>/CloudTrail/<region>/<year>/<month>/<day>/<accountid-region-some-unique-id>.json.gz" -- To use this query, you need to load the httpfs extension and either set the following s3_ variables or ensure your standard AWS secret key environment variables are set -- install https; -- load https; -- SET s3_region='us-west-2'; -- SET s3_access_key_id=''; -- SET s3_secret_access_key=''; -- The following query returns the first 10 logs from today. WITH parsed_logs AS ( select UNNEST(Records, recursive := True) from read_json_auto( "s3://<bucket>/AWSLogs/<accountid>/CloudTrail/<region>/" || strftime(now(), "%Y/%m/%d") || "/*.json.gz" , union_by_name=True ) ) SELECT * FROM parsed_logs LIMIT 10;
Copy code
Damon
Copy code
Expand
Share link
Execute this Python
# Creates a lambda that adds X+Y using pyarrow.compute # This lambda will be called once for every 2048 rows import duckdb from duckdb import pyarrow as pa import pyarrow.compute as pc from duckdb.typing import * con=duckdb.connect() con.create_function('plus_lambda', lambda x,y: pc.add(x, y), [BIGINT, BIGINT], BIGINT, type='arrow') res = con.execute('select plus_lambda(i, 5) from range(11) tbl(i)').df() print(res) con.remove_function('plus_lambda')
Copy code