From f72675beb5835b4ac31d7476de1580be767209d8 Mon Sep 17 00:00:00 2001 From: swananan Date: Thu, 31 Aug 2023 00:12:07 +0800 Subject: [PATCH] feature: support pcre2 --- .travis.yml | 26 +++++++++++++----------- lib/resty/core/regex.lua | 43 ++++++++++++++++++++++++++++++---------- t/re-base.t | 25 +++++++++++++++++------ t/re-gmatch.t | 10 +++++++--- t/re-match.t | 7 +++++-- t/re-opt.t | 9 +++++++-- t/stream/re-base.t | 27 +++++++++++++++++++------ t/stream/re-gmatch.t | 10 +++++++--- t/stream/re-match.t | 7 +++++-- t/stream/re-opt.t | 9 +++++++-- 10 files changed, 125 insertions(+), 48 deletions(-) --- a/nginx-mod-lua-resty-core/.travis.yml +++ b/nginx-mod-lua-resty-core/.travis.yml @@ -34,9 +34,13 @@ env: - LUA_INCLUDE_DIR=$LUAJIT_INC - LUA_CMODULE_DIR=/lib - PCRE_VER=8.45 + - PCRE2_VER=10.37 - PCRE_PREFIX=/opt/pcre + - PCRE2_PREFIX=/opt/pcre2 - PCRE_LIB=$PCRE_PREFIX/lib + - PCRE2_LIB=$PCRE2_PREFIX/lib - PCRE_INC=$PCRE_PREFIX/include + - PCRE2_INC=$PCRE2_PREFIX/include - OPENSSL_PREFIX=/opt/ssl - OPENSSL_LIB=$OPENSSL_PREFIX/lib - OPENSSL_INC=$OPENSSL_PREFIX/include @@ -45,7 +49,7 @@ env: - TEST_NGINX_RANDOMIZE=1 - LUACHECK_VER=0.21.1 matrix: - - NGINX_VERSION=1.25.1 OPENSSL_VER=1.1.1u OPENSSL_PATCH_VER=1.1.1f + - NGINX_VERSION=1.25.1 OPENSSL_VER=1.1.1u OPENSSL_PATCH_VER=1.1.1f USE_PCRE2=Y - NGINX_VERSION=1.21.4 OPENSSL_VER=1.1.1u OPENSSL_PATCH_VER=1.1.1f services: @@ -61,11 +65,12 @@ before_install: install: - if [ ! -d download-cache ]; then mkdir download-cache; fi - if [ ! -f download-cache/openssl-$OPENSSL_VER.tar.gz ]; then wget -P download-cache https://www.openssl.org/source/openssl-$OPENSSL_VER.tar.gz || wget -P download-cache https://www.openssl.org/source/old/${OPENSSL_VER//[a-z]/}/openssl-$OPENSSL_VER.tar.gz; fi - - if [ ! -f download-cache/pcre-$PCRE_VER.tar.gz ]; then wget -P download-cache http://ftp.cs.stanford.edu/pub/exim/pcre/pcre-$PCRE_VER.tar.gz; fi + - if [ "$USE_PCRE2" != "Y" ] && [ ! -f download-cache/pcre-$PCRE_VER.tar.gz ]; then wget -P download-cache http://ftp.cs.stanford.edu/pub/exim/pcre/pcre-$PCRE_VER.tar.gz; fi + - if [ "$USE_PCRE2" = "Y" ] && [ ! -f download-cache/pcre2-$PCRE2_VER.tar.gz ]; then wget -P download-cache https://downloads.sourceforge.net/project/pcre/pcre2/${PCRE2_VER}/pcre2-${PCRE2_VER}.tar.gz; fi - git clone https://github.com/openresty/openresty.git ../openresty - git clone https://github.com/openresty/openresty-devel-utils.git - git clone https://github.com/simpl/ngx_devel_kit.git ../ndk-nginx-module - - git clone https://github.com/openresty/lua-nginx-module.git ../lua-nginx-module + - git clone https://github.com/swananan/lua-nginx-module.git -b support_pcre2 ../lua-nginx-module - git clone https://github.com/openresty/no-pool-nginx.git ../no-pool-nginx - git clone https://github.com/openresty/echo-nginx-module.git ../echo-nginx-module - git clone https://github.com/openresty/lua-resty-lrucache.git @@ -73,8 +78,8 @@ install: - git clone -b v2.1-agentzh https://github.com/openresty/luajit2.git luajit2 - git clone https://github.com/openresty/set-misc-nginx-module.git ../set-misc-nginx-module - git clone https://github.com/openresty/mockeagain.git - - git clone https://github.com/openresty/test-nginx.git - - git clone https://github.com/openresty/stream-lua-nginx-module.git ../stream-lua-nginx-module + - git clone https://github.com/swananan/test-nginx.git -b support_pcre2 + - git clone https://github.com/swananan/stream-lua-nginx-module.git -b support_pcre2 ../stream-lua-nginx-module script: - cd luajit2/ @@ -89,12 +94,8 @@ script: - make -j$JOBS > build.log 2>&1 || (cat build.log && exit 1) - sudo make PATH=$PATH install_sw > build.log 2>&1 || (cat build.log && exit 1) - cd ../mockeagain/ && make CC=$CC -j$JOBS && cd .. - - tar zxf download-cache/pcre-$PCRE_VER.tar.gz - - cd pcre-$PCRE_VER/ - - ./configure --prefix=$PCRE_PREFIX --enable-jit --enable-utf --enable-unicode-properties > build.log 2>&1 || (cat build.log && exit 1) - - make -j$JOBS > build.log 2>&1 || (cat build.log && exit 1) - - sudo PATH=$PATH make install > build.log 2>&1 || (cat build.log && exit 1) - - cd .. + - if [ "$USE_PCRE2" != "Y" ]; then tar zxf download-cache/pcre-$PCRE_VER.tar.gz; cd pcre-$PCRE_VER/; ./configure --prefix=$PCRE_PREFIX --enable-jit --enable-utf --enable-unicode-properties > build.log 2>&1 || (cat build.log && exit 1); make -j$JOBS > build.log 2>&1 || (cat build.log && exit 1); sudo PATH=$PATH make install > build.log 2>&1 || (cat build.log && exit 1); cd ..; fi + - if [ "$USE_PCRE2" = "Y" ]; then tar zxf download-cache/pcre2-$PCRE2_VER.tar.gz; cd pcre2-$PCRE2_VER/; ./configure --prefix=$PCRE2_PREFIX --enable-jit --enable-utf > build.log 2>&1 || (cat build.log && exit 1); make -j$JOBS > build.log 2>&1 || (cat build.log && exit 1); sudo PATH=$PATH make install > build.log 2>&1 || (cat build.log && exit 1); cd ..; fi - export PATH=$PWD/work/nginx/sbin:$PWD/openresty-devel-utils:$PATH - export LD_PRELOAD=$PWD/mockeagain/mockeagain.so - export LD_LIBRARY_PATH=$PWD/mockeagain:$LD_LIBRARY_PATH @@ -104,7 +105,8 @@ script: - export disable_pcre2=--without-pcre2 - answer=`util/ver-ge "$NGINX_VERSION" 1.25.1` - if [ "$OPENSSL_VER" = "1.1.0l" ] || [ "$answer" = "N" ]; then add_http3_module=""; fi - - if [ "$answer" = "N" ]; then disable_pcre2=""; fi + - if [ "$answer" = "N" ] || [ "$USE_PCRE2" = "Y" ]; then disable_pcre2=""; fi + - if [ "$USE_PCRE2" = "Y" ]; then PCRE_INC=$PCRE2_INC; PCRE_LIB=$PCRE2_LIB; fi - ngx-build $NGINX_VERSION --with-ipv6 $disable_pcre2 $add_http3_module --with-http_realip_module --with-http_ssl_module --with-pcre-jit --with-cc-opt="-I$OPENSSL_INC -I$PCRE_INC" --with-ld-opt="-L$OPENSSL_LIB -Wl,-rpath,$OPENSSL_LIB -L$PCRE_LIB -Wl,-rpath,$PCRE_LIB" --add-module=../ndk-nginx-module --add-module=../echo-nginx-module --add-module=../set-misc-nginx-module --add-module=../headers-more-nginx-module --add-module=../lua-nginx-module --with-debug --with-stream_ssl_module --with-stream --with-ipv6 --add-module=../stream-lua-nginx-module > build.log 2>&1 || (cat build.log && exit 1) - nginx -V - ldd `which nginx`|grep -E 'luajit|ssl|pcre' --- a/nginx-mod-lua-resty-core/lib/resty/core/regex.lua +++ b/nginx-mod-lua-resty-core/lib/resty/core/regex.lua @@ -82,7 +82,7 @@ if not pcall(function() pcre_ver = ffi_s end -local MAX_ERR_MSG_LEN = 128 +local MAX_ERR_MSG_LEN = 256 local FLAG_COMPILE_ONCE = 0x01 @@ -102,6 +102,7 @@ local PCRE_DUPNAMES = 0x0080000 local PCRE_JAVASCRIPT_COMPAT = 0x2000000 +-- PCRE2_ERROR_NOMATCH uses the same value local PCRE_ERROR_NOMATCH = -1 @@ -135,22 +136,44 @@ local ngx_lua_ffi_script_eval_data -- TODO: improve this workaround when PCRE allows for unspecifying the MAP_JIT -- option. local no_jit_in_init +local pcre_ver_num -if jit.os == "OSX" then - local maj, min = string.match(pcre_ver, "^(%d+)%.(%d+)") - if maj and min then - local pcre_ver_num = tonumber(maj .. min) - - if pcre_ver_num >= 843 then - no_jit_in_init = true - end +local maj, min = string.match(pcre_ver, "^(%d+)%.(%d+)") +if maj and min then + pcre_ver_num = tonumber(maj .. min) +end - else +if jit.os == "OSX" then + if pcre_ver_num == nil then -- assume this version is faulty as well no_jit_in_init = true + + -- PCRE2 is also subject to this issue on macOS + elseif pcre_ver_num >= 843 then + no_jit_in_init = true end end +-- pcre2 +if pcre_ver_num > 845 then + -- option + PCRE_CASELESS = 0x00000008 + PCRE_MULTILINE = 0x00000400 + PCRE_DOTALL = 0x00000020 + PCRE_EXTENDED = 0x00000080 + PCRE_ANCHORED = 0x80000000 + PCRE_UTF8 = 0x00080000 + PCRE_DUPNAMES = 0x00000040 + -- In the pcre2, The PCRE_JAVASCRIPT_COMPAT option has been split into + -- independent functional options PCRE2_ALT_BSUX, PCRE2_ALLOW_EMPTY_CLASS, + -- and PCRE2_MATCH_UNSET_BACKREF. + local PCRE2_ALT_BSUX = 0x00000002 + local PCRE2_ALLOW_EMPTY_CLASS = 0x00000001 + local PCRE2_MATCH_UNSET_BACKREF = 0x00000200 + PCRE_JAVASCRIPT_COMPAT = bor(PCRE2_ALT_BSUX, PCRE2_ALLOW_EMPTY_CLASS) + PCRE_JAVASCRIPT_COMPAT = bor(PCRE2_MATCH_UNSET_BACKREF, + PCRE_JAVASCRIPT_COMPAT) +end if subsystem == 'http' then ffi.cdef[[ --- a/nginx-mod-lua-resty-core/t/re-base.t +++ b/nginx-mod-lua-resty-core/t/re-base.t @@ -26,8 +26,11 @@ __DATA__ } --- request GET /re ---- response_body -error: pcre_compile() failed: missing ) in "(abc" +--- response_body eval +$Test::Nginx::Util::PcreVersion == 2 ? +"error: pcre2_compile() failed: missing closing parenthesis in \"(abc\"\n" +: +"error: pcre_compile() failed: missing ) in \"(abc\"\n" --- no_error_log [error] @@ -63,8 +66,11 @@ error: pcre_compile() failed: missing ) } --- request GET /t ---- response_body_like chop -error: pcre_exec\(\) failed: -10 +--- response_body eval +$Test::Nginx::Util::PcreVersion == 2 ? +"error: pcre_exec\(\) failed: -4\n" +: +"error: pcre_exec\(\) failed: -10\n" --- no_error_log [error] @@ -128,6 +134,7 @@ probe process("$LIBPCRE_PATH").function( printf("exec opts: %x\n", $options) } +# TODO: PCRE2 use different option values from PCRE --- stap_out compile opts: 800 exec opts: 0 @@ -172,8 +179,14 @@ end --- request GET /re ---- response_body -error: pcre_exec() failed: -8 +--- response_body eval +# lua_regex_match_limit uses pcre_extra->match_limit in the PCRE, +# but PCRE2 replaces this with pcre2_set_match_limit interface, +# which has different effects. +$Test::Nginx::Util::PcreVersion == 2 ? +"failed to match\n" +: +"error: pcre_exec() failed: -8\n" --- a/nginx-mod-lua-resty-core/t/re-gmatch.t +++ b/nginx-mod-lua-resty-core/t/re-gmatch.t @@ -446,9 +446,13 @@ matched: nil } --- request GET /re ---- response_body -error: pcre_exec() failed: -10 -not matched +--- response_body eval +# PCRE2_ERROR_UTF8_ERR2 (-4) +# PCRE_ERROR_BADUTF8 (-10) +$Test::Nginx::Util::PcreVersion == 2 ? +"error: pcre_exec\(\) failed: -4\nnot matched\n" +: +"error: pcre_exec\(\) failed: -10\nnot matched\n" --- no_error_log [error] --- a/nginx-mod-lua-resty-core/t/re-match.t +++ b/nginx-mod-lua-resty-core/t/re-match.t @@ -306,8 +306,11 @@ NYI } --- request GET /re ---- response_body_like chop -error: pcre_compile\(\) failed: two named subpatterns have the same name +--- response_body eval +$Test::Nginx::Util::PcreVersion == 2 ? +"error: pcre2_compile\(\) failed: two named subpatterns have the same name \(PCRE2_DUPNAMES not set\) in \"\(\?[a-z])\(\?[a-z]+\), [0-9]+\" at \"[a-z]+\), [0-9]+\"\n" +: +"error: pcre_compile\(\) failed: two named subpatterns have the same name in \"\(\?[a-z])\(\?[a-z]+\), [0-9]+\" at \">[a-z]+\), [0-9]+\"\n" --- error_log eval qr/\[TRACE\s+\d+/ --- a/nginx-mod-lua-resty-core/t/re-opt.t +++ b/nginx-mod-lua-resty-core/t/re-opt.t @@ -39,8 +39,13 @@ __DATA__ } --- request GET /re ---- response_body -error: pcre_exec() failed: -27 +--- response_body eval +# PCRE2_ERROR_JIT_STACKLIMIT (-46) +# PCRE_ERROR_JIT_STACKLIMIT (-27) +$Test::Nginx::Util::PcreVersion == 2 ? +"error: pcre_exec\(\) failed: -46\n" +: +"error: pcre_exec\(\) failed: -27\n" --- no_error_log [error] --- timeout: 10 --- a/nginx-mod-lua-resty-core/t/stream/re-base.t +++ b/nginx-mod-lua-resty-core/t/stream/re-base.t @@ -22,8 +22,11 @@ __DATA__ ngx.say("error: ", err) end } ---- stream_response -error: pcre_compile() failed: missing ) in "(abc" +--- stream_response eval +$Test::Nginx::Util::PcreVersion == 2 ? +"error: pcre2_compile() failed: missing closing parenthesis in \"(abc\"\n" +: +"error: pcre_compile() failed: missing ) in \"(abc\"\n" --- no_error_log [error] @@ -55,12 +58,17 @@ error: pcre_compile() failed: missing ) ngx.say("not matched") end } ---- stream_response_like chop -error: pcre_exec\(\) failed: -10 +--- stream_response eval +$Test::Nginx::Util::PcreVersion == 2 ? +"error: pcre_exec\(\) failed: -4\n" +: +"error: pcre_exec\(\) failed: -10\n" --- no_error_log [error] +--- ONLY + === TEST 3: UTF-8 mode without UTF-8 sequence checks @@ -114,6 +122,7 @@ probe process("$LIBPCRE_PATH").function( printf("exec opts: %x\n", $options) } +# TODO: PCRE2 use different option values from PCRE --- stap_out compile opts: 800 exec opts: 0 @@ -152,8 +161,14 @@ if not res then return end ---- stream_response -error: pcre_exec() failed: -8 +--- stream_response eval +# lua_regex_match_limit uses pcre_extra->match_limit in the PCRE, +# but PCRE2 replaces this with pcre2_set_match_limit interface, +# which has different effects. +$Test::Nginx::Util::PcreVersion == 2 ? +"failed to match\n" +: +"error: pcre_exec() failed: -8\n" --- a/nginx-mod-lua-resty-core/t/stream/re-gmatch.t +++ b/nginx-mod-lua-resty-core/t/stream/re-gmatch.t @@ -394,9 +394,13 @@ matched: nil ngx.say("not matched") end } ---- stream_response -error: pcre_exec() failed: -10 -not matched +--- stream_response eval +# PCRE2_ERROR_UTF8_ERR2 (-4) +# PCRE_ERROR_BADUTF8 (-10) +$Test::Nginx::Util::PcreVersion == 2 ? +"error: pcre_exec\(\) failed: -4\nnot matched\n" +: +"error: pcre_exec\(\) failed: -10\nnot matched\n" --- no_error_log [error] --- a/nginx-mod-lua-resty-core/t/stream/re-match.t +++ b/nginx-mod-lua-resty-core/t/stream/re-match.t @@ -268,8 +268,11 @@ NYI ngx.say("not matched!") end } ---- stream_response_like chop -error: pcre_compile\(\) failed: two named subpatterns have the same name +--- stream_response eval +$Test::Nginx::Util::PcreVersion == 2 ? +"error: pcre2_compile\(\) failed: two named subpatterns have the same name \(PCRE2_DUPNAMES not set\) in \"\(\?[a-z])\(\?[a-z]+\), [0-9]+\" at \"[a-z]+\), [0-9]+\"\n" +: +"error: pcre_compile\(\) failed: two named subpatterns have the same name in \"\(\?[a-z])\(\?[a-z]+\), [0-9]+\" at \">[a-z]+\), [0-9]+\"\n" --- error_log eval qr/\[TRACE\s+\d+/ --- a/nginx-mod-lua-resty-core/t/stream/re-opt.t +++ b/nginx-mod-lua-resty-core/t/stream/re-opt.t @@ -36,8 +36,13 @@ __DATA__ ngx.say("not matched!") end } ---- stream_response -error: pcre_exec() failed: -27 +--- stream_response eval +# PCRE2_ERROR_JIT_STACKLIMIT (-46) +# PCRE_ERROR_JIT_STACKLIMIT (-27) +$Test::Nginx::Util::PcreVersion == 2 ? +"error: pcre_exec\(\) failed: -46\n" +: +"error: pcre_exec\(\) failed: -27\n" --- no_error_log [error] --- timeout: 10