/* SPDX-License-Identifier: BSD-3-Clause * Copyright (c) 2020 BIFIT */ #pragma once #include #include #include #define ABI_V1_ENTRYPOINT "filter_v1" #define SECTION(name) __attribute__((section(name), used)) /** * @mainpage * * This documentation describes mitigator_bpf.h, an API for user-defined programs * executed by BIFIT Mitigator anti-DDoS software. * * Visit https://docs.mitigator.ru for product documentation. */ /** @file */ /** * @brief Mandatory program identifier for display to the user. * * The system may reject programs without this identifier. * * The string should help the user to identify program and its version. * Maximum is 40 characters, including NUL terminator. * * @see FILTER_V1 for usage example. * * @hideinitializer */ #define PROGRAM_DISPLAY_ID(id) \ SECTION("meta.display_id") \ static const char _mitigator_meta_program_id[40] = id; /** * @brief Entry point marker for ABI v1. * * A filter that drops all packets looks as follows: * @code * #include "mitigator.h" * * FILTER_V1 enum Result * filter(Context ctx) { * return RESULT_DROP; * } * * PROGRAM_DISPLAY_ID("Example v0.1") * @endcode * * @hideinitializer */ #define FILTER_V1 SECTION(ABI_V1_ENTRYPOINT) /** * @brief Force the compiler to inline a local function. * * This macro should be used on all function definitions, otherwise compiler * might generate code that will fail validation (which forbids backward jumps). * * Example: * @code * LOCAL int * sum(int a, int b) { * return a + b; * } * @endcode * * @hideinitializer */ #define LOCAL static inline __attribute__((always_inline)) /** * @brief Hint the compiler to unroll a loop. * * Loops are usually translated to the following instruction sequence: * condition check, loop body, backwards jump to condition check. * EBPF validator forbids backwards jumps to prevent infinite loops. * Loop unrolling means loop body will be pasted a fixed number of times * in generated code. * * Obviously, unrolling requires knowing maximum number of iterations * in advance. The solution to loop "n" time is as follows: * @code * UNROLL for (i = 0; i < MAX; i++) { * if (i >= n) { * break; * } * ... * } * @endcode * * @note This macro currently only affects Clang. * @note The hint might be ignored especially with optimizations disabled. * * @hideinitializer */ #if defined(__clang__) #define UNROLL _Pragma("unroll") #else #define UNROLL #endif /** * @brief Maximum length of any packet data returned by API functions. * * Before using a variable offset into packet, programs must check * that it does not exceed this constant, otherwise eBPF validator * may reject the code. To avoid valid programs being mistakenly * rejected, place the check immediately before using the offset. * * @see hash_crc32_data() for example usage. */ #define MAX_PAYLOAD_LENGTH 1536 /** * @brief Maximum length of program parameters. * * Before using a variable offset into parameters, programs must check * that it does not exceed this constant. * * @see MAX_PAYLOAD_LENGTH for comments on offset checking. * @see parameters_get() to get parameters. */ #define MAX_PARAMETERS_LENGTH 1024 #ifdef __cplusplus namespace mitigator { #endif /** @brief Opaque filter context. */ typedef void* Context; /** * @brief Filter verdict. * * A program must not return values outside of this enumeration, otherwise * packets may be dropped or processed in unexpected ways in future versions. */ enum Result { /** Pass packet (forward). */ RESULT_PASS, /** Drop packet (discard). */ RESULT_DROP, /** Send packet back. L2, L3, and L4 headers are adjusted automatically. */ RESULT_BACK, /** Pass packet if overall rate is within limit, otherwise drop packet. */ RESULT_LIMIT }; /** * @brief ABI-safe, eBPF-friendly boolean type. * * Definitions of true and false from can be used with Bool. */ typedef uint64_t Bool; /** * Time in seconds: Unix timestamp or duration. * * @see time_sec() to get current time. */ typedef uint32_t Time; /** * @brief IPv4 header. * * Access the header as follows: * @code * struct IpHeader* ip = packet_network_header(ctx); * @endcode * * @note This function is for advanced users. Prefer packet_flow() to get addresses. * Prefer packet_transport_payload() to access payload and its length. * * @note If you modify the fields of this header and return RESULT_PASS * from the filter, call set_packet_mangled() to update checksums. * * @see packet_network_proto() to test if IPv4 header is present. * @see https://en.wikipedia.org/wiki/IPv4#Packet_structure * @see https://tools.ietf.org/html/rfc791 */ struct IpHeader { uint32_t ip_hl : 4; /* 0 header length */ uint32_t ip_v : 4; /* version == 4 */ uint8_t ip_tos; /* 1 type of service */ uint16_t ip_len; /* 2-3 total length */ uint16_t ip_id; /* 4-5 packet ID */ uint16_t ip_off; /* 6-7 fragmentation offset */ uint8_t ip_ttl; /* 8 time to live */ uint8_t ip_p; /* 9 protocol ID */ uint16_t ip_sum; /* 10-11 header checksum */ uint32_t ip_src; /* 12-15 source address */ uint32_t ip_dst; /* 16-19 destination address */ }; /** * @brief UDP header. * * Access this header after checking transport protocol: * @code * if (packet_transport_proto(ctx) == IP_PROTO_UDP) { * struct UdpHeader* udp = packet_transport_header(ctx); * ... * } * @endcode * * @note This function is for advanced users. Prefer packet_flow() to get ports. * Prefer packet_transport_payload() to access payload and its length. * * @note If you modify the fields of this header and return RESULT_PASS * from the filter, call set_packet_mangled() to update checksums. * * @see https://en.wikipedia.org/wiki/User_Datagram_Protocol#UDP_datagram_structure * @see https://tools.ietf.org/html/rfc768 * */ struct UdpHeader { uint16_t uh_sport; /* 0-1 source port */ uint16_t uh_dport; /* 2-3 destination port */ uint16_t uh_ulen; /* 4-5 UDP length */ uint16_t uh_sum; /* 6-7 checksum */ }; /** * @brief TCP header. * * Access this header after checking transport protocol: * @code * if (packet_transport_proto(ctx) == IP_PROTO_TCP) { * struct TcpHeader* tcp = packet_transport_header(ctx); * ... * } * @endcode * * @note This function is for advanced users. Prefer packet_flow() to get ports. * Prefer packet_transport_payload() to access payload and its length. * * @note If you modify the fields of this header and return result_pass * from the filter, call set_packet_mangled() to update checksums. * * @see https://en.wikipedia.org/wiki/Transmission_Control_Protocol#TCP_segment_structure * @see https://tools.ietf.org/html/rfc793 */ struct TcpHeader { uint16_t th_sport; /* 0-1 source port */ uint16_t th_dport; /* 2-3 destination port */ uint32_t th_seq; /* 4-7 sequence number */ uint32_t th_ack; /* 8-11 acknowledgement number */ uint32_t th_flags2 : 4; /* 12 more flags */ uint32_t th_off : 4; /* data offset in words */ uint8_t th_flags; /* 13 flags */ uint16_t th_win; /* 14-15 window */ uint16_t th_sum; /* 16-17 checksum */ uint16_t th_urp; /* 18-19 urgent pointer */ }; /** * @brief Ethernet frame type codes. * @see packet_network_proto() for an example use of these values. */ enum EtherType { ETHER_TYPE_IP = 0x0800, /**< Internet Protocol version 4 */ ETHER_TYPE_ARP = 0x0806, /**< Address Resolution Protocol */ ETHER_TYPE_8021Q = 0x8100, /**< IEEE 802.1q (VLAN) */ ETHER_TYPE_IP6 = 0x86DD /**< Internet Protocol version 6 */ }; /** * @brief IPv4 and IPv6 transport protocol codes. * @see packet_transport_proto() for an example use of these values. * @see https://www.iana.org/assignments/protocol-numbers/protocol-numbers.xhtml */ enum IpProto { IP_PROTO_ICMP = 1, /**< Internet Control Message Protocol */ IP_PROTO_TCP = 6, /**< Transmission Control Protocol */ IP_PROTO_UDP = 17 /**< User Datagram Protocol */ }; /** * @brief TCP flags. * * Combinations of these flags are used for TcpHeader::th_flags. * * URG, ECE, and CWR may be combined with any other flags, * and PUSH is often combined with ACK, but is never mandatory. * So this is the wrong way to check for ACK segments: * @code * if (tcp->th_flags == TCP_FLAG_ACK) { * // WRONG: will not run for ACK+PUSH, which is very common * } * @endcode * * To check if ACK flag is present in a combination: * @code * if (tcp->th_flags & TCP_FLAG_ACK) { ... } * @endcode * * To switch on the flags: * @code * switch (tcp->th_flags & (TCP_FLAG_SYN | TCP_FLAG_ACK)) { * case TCP_FLAG_SYN: ...; break; * case TCP_FLAG_ACK: ...; break; * case TCP_FLAG_SYN | TCP_FLAG_ACK: ...; break; * } * @endcode */ enum TcpFlags { TCP_FLAG_FIN = 0x01, TCP_FLAG_SYN = 0x02, TCP_FLAG_RST = 0x04, TCP_FLAG_PUSH = 0x08, TCP_FLAG_ACK = 0x10, TCP_FLAG_URG = 0x20, TCP_FLAG_ECE = 0x40, TCP_FLAG_CWR = 0x80 }; /** * @brief TCP option codes. * * Options start immediately after TCP header and are limited to 44 bytes, * so a loop over them may be UNROLL'ed. Options must be padded to 4 bytes. * * @see https://www.iana.org/assignments/tcp-parameters/tcp-parameters.xhtml#tcp-parameters-1 */ enum TcpOption { TCP_OPT_EOL = 0, /**< End of Option List */ TCP_OPT_NOP = 1, /**< No option (used for padding) */ TCP_OPT_MAXSEG = 2, /**< Maximum segment size (MSS) */ TCP_OPT_WSCALE = 3, /**< Window scaling factor */ TCP_OPT_SACK_PERM = 4, /**< Selective acknowledgement permitted */ TCP_OPT_SACK = 5, /**< Selective acknowledgement */ TCP_OPT_TIMESTAMPS = 8 /**< Timestamps */ }; /** * @brief Packet flow information. * * All fields are in network byte order, that is, port numbers must be swapped * to check against values. For example, test if port is 1234 as follows: * @code * struct Flow flow; * packet_flow(ctx, &flow); * if (flow.dport == bswap16(1234)) { ... } * @endcode */ struct Flow { uint32_t saddr; /**< Source IPv4 address. */ uint32_t daddr; /**< Destination IPv4 address. */ uint16_t sport; /**< Source port. */ uint16_t dport; /**< Destination port. */ uint32_t padding; /**< Not used and zero-filled by API. */ }; /** * @brief Get packet flow information, including source and destination. * * This is simpler and more efficient than analysing L3 and L4 headers. */ void packet_flow(Context ctx, struct Flow* info); /** * @brief Get packet network protocol code, e.g. IPv4. * * Return values are in network byte order, so the proper check for IPv4 is: * @code * if (packet_network_proto(packet) == bswap16(ETHER_TYPE_IP)) { ... } * @endcode * * @see EtherType * * @note Currently, all packets passed to BPF are IPv4, * so this function is not needed in most cases. */ uint16_t packet_network_proto(Context ctx); /** * @brief Get packet network header, e.g. IPv4 header. * * Programs may access up to MAX_PAYLOAD_LENGTH bytes starting from * the header, so that variable-length IP options and/or encapsulation * headers can be processed. However, usually casting to a fixed-length * IpHeader is sufficient. * * @note This function always succeeds. * Use packet_network_proto() to check if expected header is present. * * @note If you only need addresses, consider using packet_flow(). * * @see IpHeader */ void* packet_network_header(Context ctx); /** * @brief Get packet transport protocol code, e.g. TCP, UDP, or ICMP. * * Use IpProto constants to match on return value as follows: * @code * if (packet_transport_proto(ctx) == IP_PROTO_TCP) { ... } * @endcode */ uint8_t packet_transport_proto(Context ctx); /** * @brief Get packet transport header, e.g. TCP header. * * Programs may access up to MAX_PAYLOAD_LENGTH bytes starting * from the header, so that variable-length TCP options and/or * encapsulation headers can be processed. However, usually casting * to a fixed-length TcpHeader or UdpHeader is sufficient. * * @note This function always succeeds. * Use packet_transport_proto() to check if expected header is present. * * @note If you only need ports, consider using packet_flow(). * * @see TcpHeader * @see UdpHeader */ void* packet_transport_header(Context ctx); /** * @brief Get packet transport payload for TCP or UDP. * * For protocols other than TCP and UDP, data starting from L4 header * is returned. Use packet_transport_proto() to check the protocol. * * Example: * @code * uint16_t length = 0; * uint8_t* payload = packet_transport_payload(ctx, &length); * @endcode */ void* packet_transport_payload(Context ctx, uint16_t* length); /** * @brief Set new packet transport payload length (max 1400). * * @note Packet is marked as mangled and its headers are updated automatically. * @note This function takes effect after the filter has finished. */ void set_packet_length(Context ctx, uint16_t length); /** * @brief Set number of bytes to strip from the beginning of transport payload. * * @note If you use this function, you must also call set_packet_length() * to set up the length for the rest of the packet, otherwise it will * be truncated, i.e. its length set to 0. * @note Packet is marked as mangled and its headers are updated automatically. * @note This function takes effect after the filter has finished. */ void set_packet_offset(Context ctx, uint16_t offset); /** * @brief Convert response packet to an empty TCP SYN+ACK with syncookie. * * TCP sequence number is set to a cookie value recognized by syncookie_check(). * IPv4 "don't fragment" flag and TCP options are kept intact. * * @note Packet is marked as mangled and its headers are updated automatically. * @note This function takes effect after the filter has finished. * @note This function only takes effect when the filter returns RESULT_BACK. */ void set_packet_syncookie(Context ctx); /** * @brief Mark the packet as mangled by the program. * * A call to this function is required when changes are made directly * to the packet headers or data using pointers and the filter returns * RESULT_PASS. If the filter returns RESULT_BACK, packet is always mangled. * If the filter returns RESULT_DROP, mangling is meaningless. * * @note Changes to length fields of network and transport headers * will not be picked up by this function, use set_packet_length(), * otherwise the packet will be malformed. * * @note This function takes effect after the filter has finished. */ void set_packet_mangled(Context ctx); /** * @brief Add packet source address to temporary blacklist. * * @note This function overrides effect of set_src_whitelisted(). */ void set_src_blacklisted(Context ctx, Time duration); /** * @brief Add packet source address to temporary whitelist. * * @note This function overrides effect of set_src_blacklisted(). */ void set_src_whitelisted(Context ctx, Time duration); /** * Key to a record in the program-wide table. * * Key 0 is reserved. It is forbidden to save values by key 0 * and records are never found by this key. * * @see TableRecord for table description. */ typedef uint64_t TableKey; /** * Value of a record in the program-wide table. * * @see TableRecord for table description. */ typedef uint64_t TableValue; /** * @brief Record in the program-wide table. * * Each program has a table to store 1M records of 64-bit data by 64-bit keys. * Key 0 is reserved and forbidden. Values are loaded and stored atomically. * Last update time is maintained for each record and stale records are purged, * so there is no removal operation. To keep a record in the table, call * table_get() or table_put(). */ struct TableRecord { TableValue value; /**< User data. */ Time update_time; /**< Update time maintained by system. */ uint32_t padding; /**< Reserved, not used by API. */ }; /** * @brief Lookup value in the table by key. * * @note If any found record must also be kept in the table, prefer table_get(). */ Bool table_find(Context ctx, TableKey key, struct TableRecord* record); /** * @brief Lookup value in the table by key and modify record update time. * * Returned table record holds previous update time, which can be used * to measure time elapsed since last table_get() or table_put() call * on the record. */ Bool table_get(Context ctx, TableKey key, struct TableRecord* record); /** * @brief Update value in the table, creating a new record if needed. * * If key is not found and new record cannot be created, return false, * otherwise return true. * * @note This function modifies record update time even if the value does not change. */ Bool table_put(Context ctx, TableKey key, TableValue value); /** @brief Get number of records in the table. */ uint64_t table_size(Context ctx); /** Cookie value. */ typedef uint32_t Cookie; /** * @brief Make a cookie value for use as a sequence number in a SYN+ACK packet. * * Values generated by this function are recognized by syncookie_check(). * This function is for custom processing of TCP handshake. * * @note Use set_packet_syncookie() instead to avoid dealing with TCP header. */ Cookie syncookie_make(Context ctx); /** * @brief Check if packet is a TCP ACK carrying a SYN cookie. * * This function checks if packet's TCP acknowledgement number matches * the one generated by syncookie_make() or set_packet_syncookie() recently. * It fails for non-IPv4 and non-TCP packets. * * This function can be used not only to implement SYN cookie protection, * but also to recognize first data packets in a TCP session. For example, * if seqnum_offset is 0, the first chunk of data is recognized. To recognize * data starting from the 10-th octet, set seqnum_offset to 10. Likewise, * set acknum_offset to 20 to recognize a packet acknowledging 20 first octets * sent back to the client. */ Bool syncookie_check(Context ctx, uint32_t seqnum_offset, uint32_t acknum_offset); /** * @brief Make a generic cookie value based on random seed, current time, * and flow fields that identify the client. * * Use cookie_check() to check the cookie returned in response. * * Zero unneeded flow fields of to ignore them, e.g. zero source port if * clients are likely to change it when sending response. Always make sure * struct Flow padding is zero (packet_flow() does this automatically). * * The following code puts a cookie in the beginning of the response: * @code * uint16_t length = 0; * uint32_t* cookie = packet_transport_payload(ctx, &length); * * struct Flow flow; * packet_flow(ctx, &flow); * * flow.sport = 0; * *cookie = cookie_make(ctx, &flow); * set_packet_length(ctx, sizeof(*cookie)); * return RESULT_BACK; * @endcode */ Cookie cookie_make(Context ctx, const struct Flow* id); /** * @brief Check if a generic cookie matches the flow and has not expired. * * This function checks is the cookie was generated by cookie_make() * recently with the same exact flow. * @see cookie_make for comments on filling the Flow structure. * * The following code checks for a cookie in the beginning of payload: * @code * uint16_t length = 0; * uint32_t* cookie = packet_transport_payload(ctx, &length); * * struct Flow flow; * packet_flow(ctx, &flow); * * flow.sport = 0; * if (cookie_check(ctx, &flow, *cookie) { * ... * } * @endcode */ Bool cookie_check(Context ctx, const struct Flow* id, Cookie cookie); /** * @brief Get a pointer to read-only program parameters. * * Programs typically use parameters to allow keys, seeds, and thresholds * to be configurable by user without recompiling the source. * The entire space of MAX_PARAMETERS_LENGTH bytes is always available * to the program. Bytes not supplied by user are zero-filled. */ const void* parameters_get(Context ctx); /** * @brief Compute CRC32 (Castagnoli) of a 32-bit value. * * Note that crc32(A concatenated with B, init) == crc32(B, crc32(A, init). * For example, the following code computes CRC32 of a seed concatenated * with source IP (initial value 0x814141AB does not matter). * @code * struct Flow flow; * packet_flow(ctx, &flow); * uint32_t crc = hash_crc32_u32(flow.src, crc32_32(seed), 0x814141AB); * @endcode * * @see hash_crc32_data() for dynamically-sized data. */ uint32_t hash_crc32_u32(uint32_t value, uint32_t init); /** * @brief Compute CRC32 (Castagnoli) of a 64-bit value. * * @see hash_crc32_u32() for additional notes and examples. */ uint32_t hash_crc32_u64(uint64_t value, uint32_t init); /** * @brief Compute CRC32 (Castagnoli) over [data; end). * * Suppose a protocol requires first four bytes of payload to be CRC32 * of the remaining payload, using initial value 0xFFFFFFFF. * A filter can verify packets as follows: * @code * static const uint32_t INIT = 0xFFFFFFFF; * * uint16_t length = 0; * uint8_t* payload = packet_transport_payload(ctx, &length); * uint32_t crc = *(uint32_t*)payload; * uint8_t* rest = payload + sizeof(crc); * uint8_t* rest_end = payload + length; * * if (length > MAX_PAYLOAD_LENGTH) { * // Never happens, but required to validate that rest_end * // does not point beyond packet data. * return RESULT_DROP; * } * * if (hash_crc32_data(rest, rest_end, INIT) == crc) { * return RESULT_PASS; * } * @endcode * * @see MAX_PAYLOAD_LENGTH for detailed explanation why it's checked. * @see hash_crc32_u32() for a simpler alternative for 32-bit data. * @see hash_crc32_u64() for a simpler alternative for 64-bit data. */ uint32_t hash_crc32_data(const void* data, const void* end, uint32_t init); /** @brief Get wallclock time in seconds. */ Time time_sec(Context ctx); /** @brief Generate a pseudo-random, non cryptographically-secure value. */ uint64_t rand64(void); /** @brief Change byte order of a 16-bit value. */ LOCAL uint16_t bswap16(uint16_t value) { return __builtin_bswap16(value); } /** @brief Change byte order of a 32-bit value. */ LOCAL uint32_t bswap32(uint32_t value) { return __builtin_bswap32(value); } #ifdef __cplusplus } // namespace mitigator #endif