Deep into Postgres



Deep into Postgres

0 0


srug-presentation-postgres


On Github krzyzak / srug-presentation-postgres

Deep into Postgres

SRUG, Gliwice, 11.04.2014

Michał Krzyżanowski

@krzyzak

Łukasz Strzebińczyk

@KillaPL

Use the right tool for the right job

MySQL

MongoDB

PostgreSQL

HStore

HStore

CREATE EXTENSION "hstore";
ALTER TABLE users ADD COLUMN settings hstore;
INSERT INTO "users" values(1, 'admin=>"true",likes_count=>"9"');
SELECT * FROM "users" WHERE settings->'admin' = 'true';
add_column :users, :settings, :hstore
User.where("settings -> 'admin' = 'true'")

HStore – limitations

  • Can’t nest objects
  • Stores everything as text

JSON

JSON

We can nest things!
ALTER TABLE users ADD COLUMN json_settings json;
UPDATE "users" SET json_settings =
 '{
   "likes_count": 9,
   "roles": ["moderator", "super-moderator"],
   "we": {"can": {"nest": "things"} }
  }
 ';
SELECT * FROM "users" WHERE
 json_extract_path(json_settings, 'we', 'can', 'nest') = 'things';

JSON objects

UPDATE "users" SET json_settings =
 '{
   "likes_count": 9,
   "roles": ["moderator", "super-moderator"],
   "we": {"can": {"nest": "things"} }
  }
 ';
SELECT * FROM "users" WHERE
 '"moderator"' IN(
  SELECT value::text FROM json_array_elements(json_settings->'roles')
 );

JSON – indexing

SELECT COUNT(*) FROM lodgings;
count
--------
 868922
EXPLAIN ANALYZE SELECT raw_data::json->'name' FROM lodgings
 WHERE raw_data::json->>'name' = 'Pousada do Pico';
                                                       QUERY PLAN
------------------------------------------------------------------------------------------------------------------------
 Seq Scan on lodgings  (cost=0.00..49279.68 rows=1037 width=467) (actual time=0.509..19126.720 rows=1 loops=1)
   Filter: (((raw_data)::json ->> 'name'::text) = 'Pousada do Pico'::text)
   Rows Removed by Filter: 209338
 Total runtime: 19126.747 ms
(4 rows)
CREATE INDEX ON lodgings USING btree ( (raw_data::json->>'name') );
EXPLAIN ANALYZE SELECT raw_data::json->'name' FROM lodgings
 WHERE raw_data::json->>'name' = 'Pousada do Pico';
                                                                   QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------------------------
 Index Scan using lodgings_expr_idx on lodgings  (cost=0.43..8.45 rows=1 width=458) (actual time=0.295..0.297 rows=1 loops=1)
   Index Cond: (((raw_data)::json ->> 'name'::text) = 'Pousada do Pico'::text)
 Total runtime: 0.327 ms

#to_json

  • JBuilder

  • Rabl

  • ActiveModel::Serializers

  • PostgreSQL

Postgresql 9.2

array_to_json(anyarray [, pretty_bool])
row_to_json(record [, pretty_bool])

Postgresql 9.3

to_json(anyelement)
json_agg(record)
json_array_length(json)
json_each(json)
json_each_text(from_json json)
json_extract_path(from_json json, VARIADIC path_elems text[])
json_extract_path_text(from_json json, VARIADIC path_elems text[])
json_object_keys(json)
json_populate_record(base anyelement, from_json json, [, use_json_as_text bool=false])
json_populate_recordset(base anyelement, from_json json, [, use_json_as_text bool=false])
json_array_elements(json)
SELECT Array_to_json(COALESCE(Array_agg(Row_to_json(t)), '{}'))
FROM   (SELECT id,
               title,
               content,
               (SELECT Row_to_json(t)
                FROM   (SELECT id
                        FROM   "authors"
                        WHERE  ( "id" = "author_id" )) t) author
        FROM   "posts") t
gem "surus"
vs
gem "active_model_serializers"
ActiveModel::Serializer
class AuthorSerializer < ActiveModel::Serializer
  attributes :id, :name
end

class PostSerializer < ActiveModel::Serializer
  attributes :id, :title, :content
  has_one :author
end

ActiveModel::ArraySerializer.new(
  Post.all,
  each_serializer: PostSerializer
  ).to_json
Surus
Post.all_json(
  columns: [:id, :title, :content],
  include: {
    author: {
      columns: [:id, :name]
    }
  }
  )
def render_json(jobs)
  json = jobs.all_json(
    columns: [
      :id, :system_id,
      :stage, :label,
      :description, :first_reference,
      :second_reference, :cached_plate_names,
      :priority, :notifications_bitmap,
      :updated_at, :hidden
      ],
    include: {
      end_customer: { columns: [:name] },
      client: { columns: [:id, :short_name] },
      print_supplier: { columns: [:short_name] }
      })
  render json: json
end

Full text search

Full text search

  • Solr

  • Elastic Search

  • PostgreSQL

Postgresql 8.3

tsvector
tsquery

tsvector

select to_tsvector('Lorem ipsum dolor sit amet');
=> 'amet':5 'dolor':3 'ipsum':2 'lorem':1 'sit':4

tsquery

select to_tsquery('Lorem') || to_tsquery('ipsum');
=> 'lorem' | 'ipsum'

But does it have features?

create_table "posts", force: true do |t|
  t.text "content"
end
Basic search:
SELECT * FROM posts WHERE to_tsvector(content) @@
  to_tsquery('simple') && to_tsquery('query');
Any word:
SELECT * FROM posts WHERE to_tsvector(content) @@
  to_tsquery('simple') || to_tsquery('query');
Prefix:
SELECT * FROM posts WHERE to_tsvector(content) @@
  to_tsquery('simple' || ':*') && to_tsquery('query' || ':*');

Trigram

CREATE EXTENSION "pg_trgm";
select show_trgm('Lorem Ipsum');
=> {"  i","  l"," ip"," lo","em ",ips,lor,ore,psu,rem,sum,"um "}
select similarity('java', 'javascript');
=> 0.333333
select similarity('ruby', 'php');
=> 0
SELECT * FROM posts WHERE content % 'simple query';

Weighting

  • Scoping
  • Disjunctions
  • Conjunctions
  • Restrictions
  • Pagination
  • Ordering
  • Grouping
  • Joins
  • WHERE
  • LIMIT
  • OFFSET
  • ORDER
  • GROUP BY
  • JOIN

Faceted search

  • http://tech.pro/tutorial/1142/building-faceted-search-with-postgresql
  • http://www.garysieling.com/blog/gin-vs-gist-for-faceted-search-with-postgres-full-text-indexes
  • http://coffeecode.net/archives/270-A-Flask-of-full-text-search-in-PostgreSQL.html

But does it scale?

gem 'pg_search'
gem 'sunspot_rails'
gem 'sunspot_solr'
create_table "posts", force: true do |t|
  t.string   "title"
  t.text     "content"
end
pg_search:
class Post < ActiveRecord::Base
  include PgSearch
  pg_search_scope :search_with_postgres,
    against: [:title, :content], using: :tsearch
end
sunspot:
class Post < ActiveRecord::Base
  searchable do
    text :title, :content
  end
end
Post.create do |post|
  post.title   = Faker::Lorem.sentence
  post.content = Faker::Lorem.sentence(30)
end
pg_search:
Post.search_with_postgres(Faker::Lorem.sentence(3)).limit(30).to_a
sunspot:
Post.search do
  fulltext Faker::Lorem.sentence(3)
end.results
Post.create do |post|
  post.title   = Faker::Lorem.sentence
  post.content = Faker::Lorem.sentence(5)
end
pg_search:
Post.search_with_postgres(Faker::Lorem.sentence(1)).limit(30).to_a
sunspot:
Post.search do
  fulltext Faker::Lorem.sentence(1)
end.results
Post.create do |post|
  post.title   = ""
  post.content = Faker::Name.name
end
pg_search:
Post.search_with_postgres(Faker::Name.first_name).limit(30).to_a
sunspot:
Post.search do
  fulltext Faker::Name.first_name
end.results
pg_search_scope :search,
  against: [:id, :system_id, :description, :first_reference, :second_reference],
  associated_against: {
    print_supplier: [:name],
    end_customer: [:name],
    client: [:name, :short_name]
  }
  using: {
    tsearch: { prefix: true }
  }

UUID as Primary key

CREATE EXTENSION "uuid-ossp";
SELECT uuid_generate_v4();
#=> cf4fadf7-22e0-4f46-bdda-232b7883a1c9
create_table('my_table_name', id: :uuid) do |t|
  t.string 'title'
end
User.create.attributes
# => {"id"=>"468cdfa4-1c11-4e34-a996-ecc561c0e866", (...) }

PostGIS

Spatial and Geographic Objects for PostgreSQL

PostGIS – gentle introduction

PostGIS – gentle introduction

PostGIS – gentle introduction

SELECT ST_Distance(
  ST_GeometryFromText('Point(-118.4079 33.9434)'),  -- Los Angeles
  ST_GeometryFromText('Point(2.5711 49.0039)'))     -- Paris
    AS geometry_distance,
ST_Distance(
  ST_GeographyFromText('Point(-118.4079 33.9434)'), -- Los Angeles
  ST_GeographyFromText('Point(2.5711 49.0039)'))    -- Paris
    AS geography_distance;

 geometry_distance | geography_distance
-------------------+--------------------
  121.912825827515 |   9125804.29716346

Postgis – select POI within 2km

Create column
ALTER TABLE pois ADD COLUMN location geography(Point,4326);
Add POI
INSERT INTO pois(name, location)
 VALUES(
  "Sample",
  ST_MakePoint(52.228, 21.011), 4326)
 );
Select POIs within 2km of desired location
SELECT * FROM pois WHERE
	 ST_DWithin(
	  location,
	  ST_MakePoint(52.218, 21.019), # desired point (Warsaw)
	  2000 # Meters
	 )

Postgis – shapes example

Postgis – shapes example

ALTER TABLE districts ADD COLUMN shape geography(MultiPolygon,4326);
INSERT INTO districts(name, shape)
 VALUES(
  "Silesian",
  ST_PolygonFromText('POLYGON((-71.1776585052917 42.3902909739571,-71.1776820268866 42.3903701743239,
-71.1776063012595 42.3903825660754,-71.1775826583081 42.3903033653531,-71.1776585052917 42.3902909739571))')
 );
SELECT name FROM districts WHERE
 ST_CONTAINS(
   shape,
   ST_MakePoint(52.218, 21.019)
 )

Postgis – indexing

SELECT COUNT(*) FROM places;
  count
---------
 8738610
EXPLAIN ANALYZE SELECT pl.full_name
 FROM places pl
 WHERE
  ST_DWithin(
   pl.location,
   ST_GeomFromEWKT('SRID=4326;POINT(3.382491 50.609511)'), 1.0899
  );
                                                                                                                    QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 Bitmap Heap Scan on places pl  (cost=577.43..47434.52 rows=789 width=25) (actual time=6.446..37.533 rows=16613 loops=1)
   Recheck Cond: (location && '0103000020E61000000100000005000000DCD440F33957024028D2FD9C82C24840DCD440F33957024038BBB54C86D94940EEB25F77BAE3114038BBB54C86D94940EEB25F77BAE3114028D2FD9C82C24840DCD440F33957024028D2FD9C82C24840'::geometry)
   Filter: (('0101000020E61000005C1D0071570F0B40B0C6D974044E4940'::geometry && st_expand(location, 1.0899::double precision)) AND _st_dwithin(location, '0101000020E61000005C1D0071570F0B40B0C6D974044E4940'::geometry, 1.0899::double precision))
   Rows Removed by Filter: 2582
   ->  Bitmap Index Scan on places_location_idx  (cost=0.00..577.23 rows=11842 width=0) (actual time=5.869..5.869 rows=19195 loops=1)
         Index Cond: (location && '0103000020E61000000100000005000000DCD440F33957024028D2FD9C82C24840DCD440F33957024038BBB54C86D94940EEB25F77BAE3114038BBB54C86D94940EEB25F77BAE3114028D2FD9C82C24840DCD440F33957024028D2FD9C82C24840'::geometry)
 Total runtime: 38.362 ms

But there’s more!

  • ST_Area – Returns the area of the surface if it is a polygon or multi-polygon.
  • ST_Centroid — Returns the geometric center of a geometry.
  • ST_Touches — Returns TRUE if the geometries have at least one point in common, but their interiors do not intersect.
  • ST_ShortestLine — Returns the 2-dimensional shortest line between two geometries
  • And many, many more…

Stored procedures...

Stored procedures...

CREATE OR REPLACE FUNCTION update_reason_for_document() RETURNS trigger as $$
      DECLARE
        change_reason varchar := '';
      BEGIN
        IF (OLD.name IS DISTINCT FROM NEW.name) THEN
          change_reason := change_reason || 'NAME;';
        END IF;

        IF (OLD.document_type IS DISTINCT FROM NEW.document_type) THEN
          change_reason := change_reason || 'DOCUMENT TYPE;';
        END IF;

        INSERT INTO documents_historical_reasons(document_id, change_reason)
          VALUES (NEW.document_id, change_reason);

        RETURN NEW;
      END;

      $$ LANGUAGE plpgsql;
      CREATE TRIGGER update_history
      BEFORE UPDATE OF document_type, name ON documents
      FOR EACH ROW
      WHEN (OLD.document_type IS DISTINCT FROM NEW.document_type
            OR OLD.name IS DISTINCT FROM NEW.name)
      EXECUTE PROCEDURE update_reason_for_document();

But...

Stored procedures – JavaScript

CREATE TABLE ratings (id int, count int, total int, average float);

CREATE FUNCTION add_rating(id int, rating int)
 RETURNS float AS
$$
 var data = plv8.execute("SELECT count, total, average FROM ratings
                          WHERE id = $1", [id])[0];
 var newTotal = data.total + rating;
 var newCount = data.count + 1;
 var newAverage = (newTotal / newCount);
 plv8.execute("UPDATE ratings SET count = $1, total = $2, average = $3
               WHERE id = $4",
 [newCount, newTotal, newAverage, id]);
 return newAverage;
$$
LANGUAGE plv8;

Stored procedures – JavaScript

INSERT INTO ratings VALUES (1, 0, 0, 0);
SELECT add_rating(1, 5);
 add_rating
------------
 5
SELECT add_rating(1, 4);
 add_rating
------------
 4.5

Thanks!

Questions?

Useful links: