refactor cmake, rm absl/linsndfile, add strings unittest (#2765)

pull/2854/head
Hui Zhang 2 years ago committed by GitHub
parent 869f4267d5
commit f8caaf46c8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -1,19 +1,28 @@
cmake_minimum_required(VERSION 3.14 FATAL_ERROR)
project(paddlespeech VERSION 0.1)
set(CMAKE_PROJECT_INCLUDE_BEFORE "${CMAKE_CURRENT_SOURCE_DIR}/cmake/EnableCMP0048.cmake")
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
include(system)
# Ninja Generator will set CMAKE_BUILD_TYPE to Debug
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE
"Release"
CACHE
STRING
"Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel"
FORCE)
endif()
project(paddlespeech VERSION 0.1)
set(CMAKE_VERBOSE_MAKEFILE on)
# set std-14
set(CMAKE_CXX_STANDARD 14)
# cmake dir
set(speechx_cmake_dir ${PROJECT_SOURCE_DIR}/cmake)
# Modules
list(APPEND CMAKE_MODULE_PATH ${speechx_cmake_dir})
include(FetchContent)
include(ExternalProject)
@ -33,6 +42,7 @@ SET(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} --std=c++14 -pthread -fPIC -O3 -Wall
###############################################################################
option(TEST_DEBUG "option for debug" OFF)
option(USE_PROFILING "enable c++ profling" OFF)
option(WITH_TESTING "unit test" ON)
option(USING_U2 "compile u2 model." ON)
option(USING_DS2 "compile with ds2 model." ON)
@ -42,26 +52,10 @@ option(USING_GPU "u2 compute on GPU." OFF)
###############################################################################
# Include third party
###############################################################################
# example for include third party
# FetchContent_MakeAvailable was not added until CMake 3.14
# FetchContent_MakeAvailable()
# include_directories()
# gflags
include(gflags)
# glog
include(glog)
# gtest
include(gtest)
# ABSEIL-CPP
include(absl)
# libsndfile
include(libsndfile)
# boost
# include(boost) # not work
set(boost_SOURCE_DIR ${fc_patch}/boost-src)
@ -87,6 +81,11 @@ add_dependencies(openfst gflags glog)
# paddle lib
include(paddleinference)
# gtest
if(WITH_TESTING)
include(gtest) # download, build, install gtest
endif()
# python/pybind11/threads
find_package(Threads REQUIRED)
# https://cmake.org/cmake/help/latest/module/FindPython3.html#module:FindPython3
@ -165,15 +164,6 @@ message(STATUS PADDLE_LIB_DIRS= ${PADDLE_LIB_DIRS})
###############################################################################
# Add local library
###############################################################################
# system lib
#find_package()
# if dir have CmakeLists.txt
#add_subdirectory(speechx)
# if dir do not have CmakeLists.txt
#add_library(lib_name STATIC file.cc)
#target_link_libraries(lib_name item0 item1)
#add_dependencies(lib_name depend-target)
set(SPEECHX_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/speechx)
add_subdirectory(speechx)

@ -113,3 +113,11 @@ apt-get install gfortran-8
4. `Undefined reference to '_gfortran_concat_string'`
using gcc 8.2, gfortran 8.2.
5. `./boost/python/detail/wrap_python.hpp:57:11: fatal error: pyconfig.h: No such file or directory`
```
apt-get install python3-dev
```
for more info please see [here](https://github.com/okfn/piati/issues/65).

@ -1,3 +1,4 @@
include(FetchContent)
FetchContent_Declare(
gtest
@ -6,4 +7,9 @@ FetchContent_Declare(
)
FetchContent_MakeAvailable(gtest)
include_directories(${gtest_BINARY_DIR} ${gtest_SOURCE_DIR}/src)
include_directories(${gtest_BINARY_DIR} ${gtest_SOURCE_DIR}/src)
if(WITH_TESTING)
enable_testing()
endif()

@ -25,3 +25,5 @@ ExternalProject_Add(openfst
)
link_directories(${openfst_PREFIX_DIR}/lib)
include_directories(${openfst_PREFIX_DIR}/include)
message(STATUS "OpenFST inc dir: ${openfst_PREFIX_DIR}/include")
message(STATUS "OpenFST lib dir: ${openfst_PREFIX_DIR}/lib")

@ -0,0 +1,106 @@
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Detects the OS and sets appropriate variables.
# CMAKE_SYSTEM_NAME only give us a coarse-grained name of the OS CMake is
# building for, but the host processor name like centos is necessary
# in some scenes to distinguish system for customization.
#
# for instance, protobuf libs path is <install_dir>/lib64
# on CentOS, but <install_dir>/lib on other systems.
if(UNIX AND NOT APPLE)
# except apple from nix*Os family
set(LINUX TRUE)
endif()
if(WIN32)
set(HOST_SYSTEM "win32")
else()
if(APPLE)
set(HOST_SYSTEM "macosx")
exec_program(
sw_vers ARGS
-productVersion
OUTPUT_VARIABLE HOST_SYSTEM_VERSION)
string(REGEX MATCH "[0-9]+.[0-9]+" MACOS_VERSION "${HOST_SYSTEM_VERSION}")
if(NOT DEFINED $ENV{MACOSX_DEPLOYMENT_TARGET})
# Set cache variable - end user may change this during ccmake or cmake-gui configure.
set(CMAKE_OSX_DEPLOYMENT_TARGET
${MACOS_VERSION}
CACHE
STRING
"Minimum OS X version to target for deployment (at runtime); newer APIs weak linked. Set to empty string for default value."
)
endif()
set(CMAKE_EXE_LINKER_FLAGS "-framework CoreFoundation -framework Security")
else()
if(EXISTS "/etc/issue")
file(READ "/etc/issue" LINUX_ISSUE)
if(LINUX_ISSUE MATCHES "CentOS")
set(HOST_SYSTEM "centos")
elseif(LINUX_ISSUE MATCHES "Debian")
set(HOST_SYSTEM "debian")
elseif(LINUX_ISSUE MATCHES "Ubuntu")
set(HOST_SYSTEM "ubuntu")
elseif(LINUX_ISSUE MATCHES "Red Hat")
set(HOST_SYSTEM "redhat")
elseif(LINUX_ISSUE MATCHES "Fedora")
set(HOST_SYSTEM "fedora")
endif()
string(REGEX MATCH "(([0-9]+)\\.)+([0-9]+)" HOST_SYSTEM_VERSION
"${LINUX_ISSUE}")
endif()
if(EXISTS "/etc/redhat-release")
file(READ "/etc/redhat-release" LINUX_ISSUE)
if(LINUX_ISSUE MATCHES "CentOS")
set(HOST_SYSTEM "centos")
endif()
endif()
if(NOT HOST_SYSTEM)
set(HOST_SYSTEM ${CMAKE_SYSTEM_NAME})
endif()
endif()
endif()
# query number of logical cores
cmake_host_system_information(RESULT CPU_CORES QUERY NUMBER_OF_LOGICAL_CORES)
mark_as_advanced(HOST_SYSTEM CPU_CORES)
message(
STATUS
"Found Paddle host system: ${HOST_SYSTEM}, version: ${HOST_SYSTEM_VERSION}")
message(STATUS "Found Paddle host system's CPU: ${CPU_CORES} cores")
# external dependencies log output
set(EXTERNAL_PROJECT_LOG_ARGS
LOG_DOWNLOAD
0 # Wrap download in script to log output
LOG_UPDATE
1 # Wrap update in script to log output
LOG_CONFIGURE
1 # Wrap configure in script to log output
LOG_BUILD
0 # Wrap build in script to log output
LOG_TEST
1 # Wrap test in script to log output
LOG_INSTALL
0 # Wrap install in script to log output
)

@ -19,7 +19,7 @@ if (USING_U2)
endif()
add_library(decoder STATIC ${srcs})
target_link_libraries(decoder PUBLIC kenlm utils fst frontend nnet kaldi-decoder absl::strings)
target_link_libraries(decoder PUBLIC kenlm utils fst frontend nnet kaldi-decoder)
# test
if (USING_DS2)

@ -17,7 +17,6 @@
#include "decoder/ctc_prefix_beam_search_decoder.h"
#include "absl/strings/str_join.h"
#include "base/common.h"
#include "decoder/ctc_beam_search_opt.h"
#include "decoder/ctc_prefix_beam_search_score.h"

@ -12,7 +12,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/str_split.h"
#include "base/common.h"
#include "decoder/ctc_prefix_beam_search_decoder.h"
#include "frontend/audio/data_cache.h"

@ -9,7 +9,7 @@ if(USING_U2)
endif()
add_library(nnet STATIC ${srcs})
target_link_libraries(nnet absl::strings)
target_link_libraries(nnet utils)
if(USING_U2)
target_compile_options(nnet PUBLIC ${PADDLE_COMPILE_FLAGS})

@ -14,7 +14,7 @@
#include "nnet/ds2_nnet.h"
#include "absl/strings/str_split.h"
#include "utils/strings.h"
namespace ppspeech {
@ -26,16 +26,16 @@ using std::vector;
void PaddleNnet::InitCacheEncouts(const ModelOptions& opts) {
std::vector<std::string> cache_names;
cache_names = absl::StrSplit(opts.cache_names, ",");
cache_names = StrSplit(opts.cache_names, ",");
std::vector<std::string> cache_shapes;
cache_shapes = absl::StrSplit(opts.cache_shape, ",");
cache_shapes = StrSplit(opts.cache_shape, ",");
assert(cache_shapes.size() == cache_names.size());
cache_encouts_.clear();
cache_names_idx_.clear();
for (size_t i = 0; i < cache_shapes.size(); i++) {
std::vector<std::string> tmp_shape;
tmp_shape = absl::StrSplit(cache_shapes[i], "-");
tmp_shape = StrSplit(cache_shapes[i], "-");
std::vector<int> cur_shape;
std::transform(tmp_shape.begin(),
tmp_shape.end(),
@ -74,8 +74,8 @@ PaddleNnet::PaddleNnet(const ModelOptions& opts) : opts_(opts) {
LOG(INFO) << "start to check the predictor input and output names";
LOG(INFO) << "input names: " << opts.input_names;
LOG(INFO) << "output names: " << opts.output_names;
vector<string> input_names_vec = absl::StrSplit(opts.input_names, ",");
vector<string> output_names_vec = absl::StrSplit(opts.output_names, ",");
std::vector<std::string> input_names_vec = StrSplit(opts.input_names, ",");
std::vector<std::string> output_names_vec = StrSplit(opts.output_names, ",");
paddle_infer::Predictor* predictor = GetPredictor();

@ -10,4 +10,4 @@ target_link_libraries(websocket_server_main PUBLIC fst websocket ${DEPS})
add_executable(websocket_client_main ${CMAKE_CURRENT_SOURCE_DIR}/websocket_client_main.cc)
target_include_directories(websocket_client_main PRIVATE ${SPEECHX_ROOT} ${SPEECHX_ROOT}/kaldi)
target_link_libraries(websocket_client_main PUBLIC fst websocket ${DEPS})
target_link_libraries(websocket_client_main PUBLIC fst websocket ${DEPS})

@ -2,4 +2,18 @@
add_library(utils
file_utils.cc
math.cc
)
strings.cc
)
if(WITH_TESTING)
enable_testing()
link_libraries(gtest_main gmock)
add_executable(strings_test strings_test.cc)
target_link_libraries(strings_test PUBLIC utils)
add_test(
NAME strings_test
COMMAND strings_test
)
endif()

@ -15,13 +15,14 @@
// limitations under the License.
#include "utils/math.h"
#include "base/basic_types.h"
#include <algorithm>
#include <cmath>
#include <queue>
#include <utility>
#include "base/common.h"
#include <string>
#include <vector>
namespace ppspeech {

@ -0,0 +1,50 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <sstream>
#include "utils/strings.h"
namespace ppspeech {
std::vector<std::string> StrSplit(const std::string& str, const char *delim, bool omit_empty_string){
std::vector<std::string> outs;
int start = 0;
int end = str.size();
int found = 0;
while(found != std::string::npos){
found = str.find_first_of(delim, start);
// start != end condition is for when the delimiter is at the end
if (!omit_empty_string || (found != start && start != end)){
outs.push_back(str.substr(start, found - start));
}
start = found + 1;
}
return outs;
}
std::string StrJoin(const std::vector<std::string>& strs, const char* delim) {
std::stringstream ss;
for (ssize_t i = 0; i < strs.size(); ++i){
ss << strs[i];
if ( i < strs.size() -1){
ss << std::string(delim);
}
}
return ss.str();
}
} // namespace ppspeech

@ -0,0 +1,26 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <vector>
#include <string>
namespace ppspeech {
std::vector<std::string> StrSplit(const std::string& str, const char *delim, bool omit_empty_string=true);
std::string StrJoin(const std::vector<std::string>& strs, const char* delim);
} // namespace ppspeech

@ -0,0 +1,35 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "utils/strings.h"
#include <gtest/gtest.h>
#include <gmock/gmock.h>
TEST(StringTest, StrSplitTest) {
using ::testing::ElementsAre;
std::string test_str = "hello world";
std::vector<std::string> outs = ppspeech::StrSplit(test_str, " \t");
EXPECT_THAT(outs, ElementsAre("hello", "world"));
}
TEST(StringTest, StrJoinTest) {
std::vector<std::string> ins{"hello", "world"};
std::string out = ppspeech::StrJoin(ins, " ");
EXPECT_THAT(out, "hello world");
}
Loading…
Cancel
Save