Postgres full-text search Spring boot integration

Kostiantyn Ivanov
6 min readAug 29, 2023

--

Motivation

We have some UI with search functionality…

The search flow looks like this:

The issue is, that the “rating, user name” data are related to a user entity, meanwhile “create date, number of views, title, body” are related to a story entity. And we need to support pretty frequent modification of the users’ rating as well as the next search capabilities:

  • Range search by user rating
  • Range search by story creation date
  • Range search but stories’ number of views
  • Full-text search by story titles
  • Full-text search by story bodies

Setup

  • Spring boot application (Spring web, Spring jdbc, Postges driver)
  • Postgres database

UI is out of scope and we just will provide HTTP endpoints.

Database

Schema:

--Create Users table
CREATE TABLE IF NOT EXISTS users
(
id bigserial NOT NULL,
name character varying(100) NOT NULL,
rating integer,
PRIMARY KEY (id)
)
;
CREATE INDEX usr_rating_idx
ON users USING btree
(rating ASC NULLS LAST)
TABLESPACE pg_default
;

--Create Stories table
CREATE TABLE IF NOT EXISTS stories
(
id bigserial NOT NULL,
create_date timestamp without time zone NOT NULL,
num_views bigint NOT NULL,
title text NOT NULL,
body text NOT NULL,
fulltext tsvector,
user_id bigint,
PRIMARY KEY (id),
CONSTRAINT user_id_fk FOREIGN KEY (user_id)
REFERENCES users (id) MATCH SIMPLE
ON UPDATE NO ACTION
ON DELETE NO ACTION
NOT VALID
)
;
CREATE INDEX str_bt_idx
ON stories USING btree
(create_date ASC NULLS LAST,
num_views ASC NULLS LAST, user_id ASC NULLS LAST)
;

CREATE INDEX fulltext_search_idx
ON stories USING gin
(fulltext)
;

We created two tables (users with btree index to search by rating and stories with btree index for number of views, created time and user id + gin index for full-text search column. We used tsvector type for full-text search data and will support its modification on the app level.

--Create Users table
--id, name, rating

--Create stories table
--id, user_id, create_time, num_views, title, body

select * from taskentity
where to_tsvector(description) @@ plainto_tsquery('two five')
;

insert into taskentity (id, description) values (105, 'one two three four five')
;

----------------------------------

create index fulltext_index
on story
using GIN (fulltext_index)
;

insert into story (data, fulltext_index)
values (
'{"title": "first story", "body": "my first story..."}',
to_tsvector('first' || ' ' || 'story')
)
;

select * from story
where fulltext_index @@ plainto_tsquery('my first story')
;
/src/main/resources/schema.sql

Application

Dependencies

plugins {
id 'java'
id 'org.springframework.boot' version '3.1.3'
id 'io.spring.dependency-management' version '1.1.3'
}

group = 'com.example'
version = '0.0.1-SNAPSHOT'

java {
sourceCompatibility = '17'
}

repositories {
mavenCentral()
}

dependencies {
implementation 'org.springframework.boot:spring-boot-starter-data-jdbc'
implementation 'org.springframework.boot:spring-boot-starter-web'
runtimeOnly 'org.postgresql:postgresql'
testImplementation 'org.springframework.boot:spring-boot-starter-test'
}

tasks.named('test') {
useJUnitPlatform()
}

Schema.sql (resources root)

Take it from a database section if you need an automatical schema setup.

Configuration

spring:
datasource:
url: jdbc:postgresql://localhost:5432/postgres
username: postgres
password: postgres

Model

public record Period(String fieldName, LocalDateTime min, LocalDateTime max) {
}
public record Range(String fieldName, long min, long max) {
}
public record Search(List<Period> periods, List<Range> ranges, String fullText, long offset, long limit) {
}
public record UserStory(Long id, LocalDateTime createDate, Long numberOfViews,
String title, String body, Long userRating, String userName, Long userId) {
}

Repository

@Repository
public class UserStoryRepository {

private final JdbcTemplate jdbcTemplate;


@Autowired
public UserStoryRepository(JdbcTemplate jdbcTemplate) {
this.jdbcTemplate = jdbcTemplate;
}

public List<UserStory> findByFilters(Search search) {
return jdbcTemplate.query(
"""
SELECT
s.id id,
create_date,
num_views,
title,
body,
user_id,
name user_name,
rating user_rating
FROM stories s INNER JOIN users u
ON s.user_id = u.id
WHERE true
""" + buildDynamicFiltersText(search)
+ " order by create_date desc offset ? limit ?",
(rs, rowNum) -> new UserStory(
rs.getLong("id"),
rs.getTimestamp("create_date").toLocalDateTime(),
rs.getLong("num_views"),
rs.getString("title"),
rs.getString("body"),
rs.getLong("user_rating"),
rs.getString("user_name"),
rs.getLong("user_id")
),
buildDynamicFilters(search)
);
}

public void save(UserStory userStory) {
var keyHolder = new GeneratedKeyHolder();

jdbcTemplate.update(connection -> {
PreparedStatement ps = connection
.prepareStatement(
"""
INSERT INTO stories (create_date, num_views, title, body, user_id)
VALUES (?, ?, ?, ?, ?)
""",
Statement.RETURN_GENERATED_KEYS
);
ps.setTimestamp(1, Timestamp.valueOf(userStory.createDate()));
ps.setLong(2, userStory.numberOfViews());
ps.setString(3, userStory.title());
ps.setString(4, userStory.body());
ps.setLong(5, userStory.userId());

return ps;
}, keyHolder);

var generatedId = (Long) keyHolder.getKeys().get("id");

if (generatedId != null) {
updateFullTextField(generatedId);
}
}

private void updateFullTextField(Long generatedId) {
jdbcTemplate.update(
"""
UPDATE stories SET fulltext = to_tsvector(title || ' ' || body)
where id = ?
""",
generatedId
);
}

private Object[] buildDynamicFilters(Search search) {
var filtersStream = search.ranges().stream()
.flatMap(
range -> Stream.of((Object) range.min(), range.max())
);

var periodsStream = search.periods().stream()
.flatMap(
range -> Stream.of((Object) Timestamp.valueOf(range.min()), Timestamp.valueOf(range.max()))
);

filtersStream = Stream.concat(filtersStream, periodsStream);

if (!search.fullText().isBlank()) {
filtersStream = Stream.concat(filtersStream, Stream.of(search.fullText()));
}

filtersStream = Stream.concat(filtersStream, Stream.of(search.offset(), search.limit()));

return filtersStream.toArray();
}

private String buildDynamicFiltersText(Search search) {
var rangesFilterString =
Stream.concat(
search.ranges()
.stream()
.map(
range -> String.format(" and %s between ? and ? ", range.fieldName())
),
search.periods()
.stream()
.map(
range -> String.format(" and %s between ? and ? ", range.fieldName())
)
)
.collect(Collectors.joining(" "));

return rangesFilterString + buildFulltextFilterText(search.fullText());
}

private String buildFulltextFilterText(String fullText) {
return fullText.isBlank() ? "" : " and fulltext @@ plainto_tsquery(?) ";
}
}

The main ideas, implemented in the repository:
- When we save a new story — we also update the full-text field using

UPDATE stories SET fulltext = to_tsvector(title || ' ' || body)
  • When we search for stories — we build the filters dynamically. The full SQL query (with all the filters) will look like this:
SELECT
s.id id,
create_date,
num_views,
title,
body,
user_id,
name user_name,
rating user_rating
FROM stories s INNER JOIN users u
ON s.user_id = u.id
WHERE true
AND rating BETWEEN ? AND ?
AND num_views BETWEEN ? AND ?
AND create_date BETWEEN ? AND ?
AND fulltext @@ plainto_tsquery(?)
ORDER BY create_date DESC OFFSET ? LIMIT ?

Controller

@RestController
@RequestMapping("/user-stories")
public class UserStoryController {
private final UserStoryRepository userStoryRepository;

@Autowired
public UserStoryController(UserStoryRepository userStoryRepository) {
this.userStoryRepository = userStoryRepository;
}

@PostMapping
public void save(@RequestBody UserStory userStory) {
userStoryRepository.save(userStory);
}

@PostMapping("/search")
public List<UserStory> search(@RequestBody Search search) {
return userStoryRepository.findByFilters(search);
}
}

Service layers was skipped as well as mapping between DTOs and models. Also CRUD repository for user entity was not implemented since it’s out of scope of this article.

Test

Save new story

POST /user-stories (per 1000 requests): 31ms average latency.

Search for a stories

Request:
POST /user-stories/search

{
"ranges": [
{
"fieldName": "rating",
"min": 1,
"max": 5
},
{
"fieldName": "num_views",
"min": 0,
"max": 50
}
],
"periods": [
{
"fieldName": "create_date",
"min": "2023-08-28T00:00:00",
"max": "2023-08-28T23:00:00"
}
],
"fullText": "Second story",
"offset": 0,
"limit": 10

}

SQL query:

SELECT
s.id id,
create_date,
num_views,
title,
body,
user_id,
name user_name,
rating user_rating
FROM stories s INNER JOIN users u
ON s.user_id = u.id
WHERE true
AND rating BETWEEN 0 AND 100
AND num_views BETWEEN 0 AND 100
AND create_date BETWEEN '2023-08-28' AND '2023-08-29'
AND fulltext @@ plainto_tsquery('Second story')
ORDER BY create_date DESC OFFSET 0 LIMIT 100
;

Query plan:

Summary

Postgres can be a good option to choose when we have a predefined filters schema. The full-text search using tsvectors works fine and we avoid the full table scans. The drawback is that we need to support our full-text field manually (potentially we can create a triggered function on DB that will simplify our application code will exclude extra DB call).

Links

Git repo with a sorce code: https://github.com/sIvanovKonstantyn/postgres-fulltext-search

--

--